<?xml version="1.0" ?>
<root date="2021-06-04">
  <extension name="3DNOW">
    <instruction asm="FEMMS" category="MMX" cpl="3" extension="3DNOW" iclass="FEMMS" iform="FEMMS" isa-set="3DNOW" string="FEMMS" url="uops.info/html-instr/FEMMS.html"/>
    <instruction asm="PAVGUSB" category="3DNOW" cpl="3" extension="3DNOW" iclass="PAVGUSB" iform="PAVGUSB_MMXq_MEMq" isa-set="3DNOW" string="PAVGUSB (MM, M64)" url="uops.info/html-instr/PAVGUSB_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PAVGUSB" category="3DNOW" cpl="3" extension="3DNOW" iclass="PAVGUSB" iform="PAVGUSB_MMXq_MMXq" isa-set="3DNOW" string="PAVGUSB (MM, MM)" url="uops.info/html-instr/PAVGUSB_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PF2ID" category="3DNOW" cpl="3" extension="3DNOW" iclass="PF2ID" iform="PF2ID_MMXq_MEMq" isa-set="3DNOW" string="PF2ID (MM, M64)" url="uops.info/html-instr/PF2ID_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PF2ID" category="3DNOW" cpl="3" extension="3DNOW" iclass="PF2ID" iform="PF2ID_MMXq_MMXq" isa-set="3DNOW" string="PF2ID (MM, MM)" url="uops.info/html-instr/PF2ID_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PF2IW" category="3DNOW" cpl="3" extension="3DNOW" iclass="PF2IW" iform="PF2IW_MMXq_MEMq" isa-set="3DNOW" string="PF2IW (MM, M64)" url="uops.info/html-instr/PF2IW_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PF2IW" category="3DNOW" cpl="3" extension="3DNOW" iclass="PF2IW" iform="PF2IW_MMXq_MMXq" isa-set="3DNOW" string="PF2IW (MM, MM)" url="uops.info/html-instr/PF2IW_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFACC" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFACC" iform="PFACC_MMXq_MEMq" isa-set="3DNOW" string="PFACC (MM, M64)" url="uops.info/html-instr/PFACC_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFACC" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFACC" iform="PFACC_MMXq_MMXq" isa-set="3DNOW" string="PFACC (MM, MM)" url="uops.info/html-instr/PFACC_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFADD" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFADD" iform="PFADD_MMXq_MEMq" isa-set="3DNOW" string="PFADD (MM, M64)" url="uops.info/html-instr/PFADD_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFADD" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFADD" iform="PFADD_MMXq_MMXq" isa-set="3DNOW" string="PFADD (MM, MM)" url="uops.info/html-instr/PFADD_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFCMPEQ" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFCMPEQ" iform="PFCMPEQ_MMXq_MEMq" isa-set="3DNOW" string="PFCMPEQ (MM, M64)" url="uops.info/html-instr/PFCMPEQ_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFCMPEQ" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFCMPEQ" iform="PFCMPEQ_MMXq_MMXq" isa-set="3DNOW" string="PFCMPEQ (MM, MM)" url="uops.info/html-instr/PFCMPEQ_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFCMPGE" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFCMPGE" iform="PFCMPGE_MMXq_MEMq" isa-set="3DNOW" string="PFCMPGE (MM, M64)" url="uops.info/html-instr/PFCMPGE_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFCMPGE" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFCMPGE" iform="PFCMPGE_MMXq_MMXq" isa-set="3DNOW" string="PFCMPGE (MM, MM)" url="uops.info/html-instr/PFCMPGE_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFCMPGT" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFCMPGT" iform="PFCMPGT_MMXq_MEMq" isa-set="3DNOW" string="PFCMPGT (MM, M64)" url="uops.info/html-instr/PFCMPGT_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFCMPGT" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFCMPGT" iform="PFCMPGT_MMXq_MMXq" isa-set="3DNOW" string="PFCMPGT (MM, MM)" url="uops.info/html-instr/PFCMPGT_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFMAX" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFMAX" iform="PFMAX_MMXq_MEMq" isa-set="3DNOW" string="PFMAX (MM, M64)" url="uops.info/html-instr/PFMAX_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFMAX" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFMAX" iform="PFMAX_MMXq_MMXq" isa-set="3DNOW" string="PFMAX (MM, MM)" url="uops.info/html-instr/PFMAX_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFMIN" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFMIN" iform="PFMIN_MMXq_MEMq" isa-set="3DNOW" string="PFMIN (MM, M64)" url="uops.info/html-instr/PFMIN_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFMIN" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFMIN" iform="PFMIN_MMXq_MMXq" isa-set="3DNOW" string="PFMIN (MM, MM)" url="uops.info/html-instr/PFMIN_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFMUL" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFMUL" iform="PFMUL_MMXq_MEMq" isa-set="3DNOW" string="PFMUL (MM, M64)" url="uops.info/html-instr/PFMUL_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFMUL" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFMUL" iform="PFMUL_MMXq_MMXq" isa-set="3DNOW" string="PFMUL (MM, MM)" url="uops.info/html-instr/PFMUL_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFNACC" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFNACC" iform="PFNACC_MMXq_MEMq" isa-set="3DNOW" string="PFNACC (MM, M64)" url="uops.info/html-instr/PFNACC_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFNACC" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFNACC" iform="PFNACC_MMXq_MMXq" isa-set="3DNOW" string="PFNACC (MM, MM)" url="uops.info/html-instr/PFNACC_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFPNACC" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFPNACC" iform="PFPNACC_MMXq_MEMq" isa-set="3DNOW" string="PFPNACC (MM, M64)" url="uops.info/html-instr/PFPNACC_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFPNACC" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFPNACC" iform="PFPNACC_MMXq_MMXq" isa-set="3DNOW" string="PFPNACC (MM, MM)" url="uops.info/html-instr/PFPNACC_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFRCP" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRCP" iform="PFRCP_MMXq_MEMq" isa-set="3DNOW" string="PFRCP (MM, M64)" url="uops.info/html-instr/PFRCP_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFRCP" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRCP" iform="PFRCP_MMXq_MMXq" isa-set="3DNOW" string="PFRCP (MM, MM)" url="uops.info/html-instr/PFRCP_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFRCPIT1" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRCPIT1" iform="PFRCPIT1_MMXq_MEMq" isa-set="3DNOW" string="PFRCPIT1 (MM, M64)" url="uops.info/html-instr/PFRCPIT1_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFRCPIT1" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRCPIT1" iform="PFRCPIT1_MMXq_MMXq" isa-set="3DNOW" string="PFRCPIT1 (MM, MM)" url="uops.info/html-instr/PFRCPIT1_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFRCPIT2" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRCPIT2" iform="PFRCPIT2_MMXq_MEMq" isa-set="3DNOW" string="PFRCPIT2 (MM, M64)" url="uops.info/html-instr/PFRCPIT2_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFRCPIT2" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRCPIT2" iform="PFRCPIT2_MMXq_MMXq" isa-set="3DNOW" string="PFRCPIT2 (MM, MM)" url="uops.info/html-instr/PFRCPIT2_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFRSQIT1" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRSQIT1" iform="PFRSQIT1_MMXq_MEMq" isa-set="3DNOW" string="PFRSQIT1 (MM, M64)" url="uops.info/html-instr/PFRSQIT1_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFRSQIT1" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRSQIT1" iform="PFRSQIT1_MMXq_MMXq" isa-set="3DNOW" string="PFRSQIT1 (MM, MM)" url="uops.info/html-instr/PFRSQIT1_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFRSQRT" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRSQRT" iform="PFRSQRT_MMXq_MEMq" isa-set="3DNOW" string="PFRSQRT (MM, M64)" url="uops.info/html-instr/PFRSQRT_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFRSQRT" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFRSQRT" iform="PFRSQRT_MMXq_MMXq" isa-set="3DNOW" string="PFRSQRT (MM, MM)" url="uops.info/html-instr/PFRSQRT_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFSUB" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFSUB" iform="PFSUB_MMXq_MEMq" isa-set="3DNOW" string="PFSUB (MM, M64)" url="uops.info/html-instr/PFSUB_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFSUB" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFSUB" iform="PFSUB_MMXq_MMXq" isa-set="3DNOW" string="PFSUB (MM, MM)" url="uops.info/html-instr/PFSUB_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PFSUBR" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFSUBR" iform="PFSUBR_MMXq_MEMq" isa-set="3DNOW" string="PFSUBR (MM, M64)" url="uops.info/html-instr/PFSUBR_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PFSUBR" category="3DNOW" cpl="3" extension="3DNOW" iclass="PFSUBR" iform="PFSUBR_MMXq_MMXq" isa-set="3DNOW" string="PFSUBR (MM, MM)" url="uops.info/html-instr/PFSUBR_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PI2FD" category="3DNOW" cpl="3" extension="3DNOW" iclass="PI2FD" iform="PI2FD_MMXq_MEMq" isa-set="3DNOW" string="PI2FD (MM, M64)" url="uops.info/html-instr/PI2FD_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PI2FD" category="3DNOW" cpl="3" extension="3DNOW" iclass="PI2FD" iform="PI2FD_MMXq_MMXq" isa-set="3DNOW" string="PI2FD (MM, MM)" url="uops.info/html-instr/PI2FD_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PI2FW" category="3DNOW" cpl="3" extension="3DNOW" iclass="PI2FW" iform="PI2FW_MMXq_MEMq" isa-set="3DNOW" string="PI2FW (MM, M64)" url="uops.info/html-instr/PI2FW_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PI2FW" category="3DNOW" cpl="3" extension="3DNOW" iclass="PI2FW" iform="PI2FW_MMXq_MMXq" isa-set="3DNOW" string="PI2FW (MM, MM)" url="uops.info/html-instr/PI2FW_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PMULHRW" category="3DNOW" cpl="3" extension="3DNOW" iclass="PMULHRW" iform="PMULHRW_MMXq_MEMq" isa-set="3DNOW" string="PMULHRW (MM, M64)" url="uops.info/html-instr/PMULHRW_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PMULHRW" category="3DNOW" cpl="3" extension="3DNOW" iclass="PMULHRW" iform="PMULHRW_MMXq_MMXq" isa-set="3DNOW" string="PMULHRW (MM, MM)" url="uops.info/html-instr/PMULHRW_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
    <instruction asm="PSWAPD" category="3DNOW" cpl="3" extension="3DNOW" iclass="PSWAPD" iform="PSWAPD_MMXq_MEMq" isa-set="3DNOW" string="PSWAPD (MM, M64)" url="uops.info/html-instr/PSWAPD_MM_M64.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
    </instruction>
    <instruction asm="PSWAPD" category="3DNOW" cpl="3" extension="3DNOW" iclass="PSWAPD" iform="PSWAPD_MMXq_MMXq" isa-set="3DNOW" string="PSWAPD (MM, MM)" url="uops.info/html-instr/PSWAPD_MM_MM.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">MM0,MM1,MM2,MM3,MM4,MM5,MM6,MM7</operand>
    </instruction>
  </extension>
  <extension name="3DNOW_PREFETCH">
    <instruction asm="PREFETCHW" category="PREFETCH" cpl="3" extension="3DNOW_PREFETCH" iclass="PREFETCHW" iform="PREFETCHW_0F0Dr1" isa-set="PREFETCH_NOP" string="PREFETCHW (M512)" summary="Prefetch Data into Caches in Anticipation of a Write" url="uops.info/html-instr/PREFETCHW_M512.html" url-ref="felixcloutier.com/x86/PREFETCHW.html">
      <operand idx="1" memory-prefix="zmmword ptr" name="MEM0" r="1" type="mem" width="512" xtype="i64"/>
      <architecture name="CON">
        <measurement TP_loop="3.00" TP_ports="0.50" TP_unrolled="3.00" ports="1*p15" uops="1"/>
      </architecture>
      <architecture name="WOL">
        <measurement TP_loop="3.00" TP_ports="0.50" TP_unrolled="3.00" ports="1*p15" uops="1"/>
      </architecture>
      <architecture name="NHM">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="WSM">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="SNB">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="0" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="0" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="HSW">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="0" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="BDW">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="SKL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="SKX">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.33" TP_unrolled="0.33" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="PREFETCH" category="PREFETCH" cpl="3" extension="3DNOW_PREFETCH" iclass="PREFETCH_EXCLUSIVE" iform="PREFETCH_EXCLUSIVE_MEMmprefetch" isa-set="PREFETCH_NOP" string="PREFETCH_EXCLUSIVE (M512)" url="uops.info/html-instr/PREFETCH_EXCLUSIVE_M512.html">
      <operand idx="1" memory-prefix="zmmword ptr" name="MEM0" r="1" type="mem" width="512" xtype="i64"/>
      <architecture name="CON">
        <measurement TP_loop="3.00" TP_ports="0.50" TP_unrolled="3.00" ports="1*p15" uops="1"/>
      </architecture>
      <architecture name="WOL">
        <measurement TP_loop="3.00" TP_ports="0.50" TP_unrolled="3.00" ports="1*p15" uops="1"/>
      </architecture>
      <architecture name="NHM">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="4" ports="1*p2" ports_indexed="1*p2" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p2" ports_indexed="1*p2" uops="1" uops_indexed="1" version="2.2"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="WSM">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="4" ports="1*p2" ports_indexed="1*p2" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p2" ports_indexed="1*p2" uops="1" uops_indexed="1" version="2.2"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="5" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="0" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="5" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="0" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="5" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="0" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="BDW">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="SKL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="SKX">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1"/>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.33" TP_unrolled="0.33" uops="1"/>
      </architecture>
    </instruction>
  </extension>
  <extension name="ADOX_ADCX">
    <instruction asm="ADCX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADCX" iform="ADCX_GPR32d_GPR32d" isa-set="ADOX_ADCX" string="ADCX (R32, R32)" summary="Unsigned Integer Addition of Two Operands with Carry Flag" url="uops.info/html-instr/ADCX_R32_R32.html" url-ref="felixcloutier.com/x86/ADCX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand flag_CF="r/w" idx="3" name="REG2" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="3.0"/>
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="3.0"/>
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="3.0"/>
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.56" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.56" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.67" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="ALU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.58" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="ALU0/ALU3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="ALU0/1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="ADCX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADCX" iform="ADCX_GPR32d_MEMd" isa-set="ADOX_ADCX" string="ADCX (R32, M32)" summary="Unsigned Integer Addition of Two Operands with Carry Flag" url="uops.info/html-instr/ADCX_R32_M32.html" url-ref="felixcloutier.com/x86/ADCX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="i32"/>
      <operand flag_CF="r/w" idx="3" name="REG1" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.75" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.75" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.60" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.75" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.75" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.56" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.56" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.56" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.67" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="0.67" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.67" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="0.67" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.67" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="0.67" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.70" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="ALU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.57" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="ALU0/ALU3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="ALU0/1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="ADCX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADCX" iform="ADCX_GPR64q_GPR64q" isa-set="ADOX_ADCX" string="ADCX (R64, R64)" summary="Unsigned Integer Addition of Two Operands with Carry Flag" url="uops.info/html-instr/ADCX_R64_R64.html" url-ref="felixcloutier.com/x86/ADCX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand flag_CF="r/w" idx="3" name="REG2" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="3.0"/>
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="3.0"/>
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="3.0"/>
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.56" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.56" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.56" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.67" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="ALU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.58" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="ALU0/ALU3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="ALU0/1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="ADCX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADCX" iform="ADCX_GPR64q_MEMq" isa-set="ADOX_ADCX" string="ADCX (R64, M64)" summary="Unsigned Integer Addition of Two Operands with Carry Flag" url="uops.info/html-instr/ADCX_R64_M64.html" url-ref="felixcloutier.com/x86/ADCX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
      <operand flag_CF="r/w" idx="3" name="REG1" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.75" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.75" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.60" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.75" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.75" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.56" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.56" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.56" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.67" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="0.67" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.67" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="0.67" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.67" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="0.67" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.70" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="ALU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.57" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="1" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="1" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="ALU0/ALU3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="ALU0/1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="ADOX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADOX" iform="ADOX_GPR32d_GPR32d" isa-set="ADOX_ADCX" string="ADOX (R32, R32)" summary="Unsigned Integer Addition of Two Operands with Overflow Flag" url="uops.info/html-instr/ADOX_R32_R32.html" url-ref="felixcloutier.com/x86/ADOX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand flag_OF="r/w" idx="3" name="REG2" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="3.0"/>
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="3.0"/>
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="3.0"/>
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.56" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.56" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.44" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="ALU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.58" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="ALU0/ALU3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.33" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="ALU0/1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="ADOX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADOX" iform="ADOX_GPR32d_MEMd" isa-set="ADOX_ADCX" string="ADOX (R32, M32)" summary="Unsigned Integer Addition of Two Operands with Overflow Flag" url="uops.info/html-instr/ADOX_R32_M32.html" url-ref="felixcloutier.com/x86/ADOX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="i32"/>
      <operand flag_OF="r/w" idx="3" name="REG1" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.75" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.75" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.57" TP_loop_indexed="0.60" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.75" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_indexed="0.58" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.75" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_indexed="0.58" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.58" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.57" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.58" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.56" TP_loop_indexed="0.58" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.60" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="0.67" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.60" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="0.67" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.60" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="0.67" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="ALU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.57" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="1" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="1" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="ALU0/ALU3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.38" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="ALU0/1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="ADOX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADOX" iform="ADOX_GPR64q_GPR64q" isa-set="ADOX_ADCX" string="ADOX (R64, R64)" summary="Unsigned Integer Addition of Two Operands with Overflow Flag" url="uops.info/html-instr/ADOX_R64_R64.html" url-ref="felixcloutier.com/x86/ADOX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand flag_OF="r/w" idx="3" name="REG2" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="3.0"/>
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="3.0"/>
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p06" uops="1" version="3.0"/>
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.54" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.56" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.56" TP_ports="0.50" TP_unrolled="0.56" ports="1*p06" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.44" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="ALU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.58" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="ALU0/ALU3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.33" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="ALU0/1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="ADOX" category="ADOX_ADCX" cpl="3" extension="ADOX_ADCX" iclass="ADOX" iform="ADOX_GPR64q_MEMq" isa-set="ADOX_ADCX" string="ADOX (R64, M64)" summary="Unsigned Integer Addition of Two Operands with Overflow Flag" url="uops.info/html-instr/ADOX_R64_M64.html" url-ref="felixcloutier.com/x86/ADOX.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
      <operand flag_OF="r/w" idx="3" name="REG1" r="1" suppressed="1" type="flags" w="1"/>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.75" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.75" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.57" TP_loop_indexed="0.60" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.75" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_indexed="0.58" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.75" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_indexed="0.58" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.56" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.56" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.60" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.56" TP_loop_indexed="0.58" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="1.00" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.60" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="0.67" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.60" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="0.67" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.56" TP_loop_indexed="0.60" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.56" TP_unrolled_indexed="0.67" ports="1*p06+1*p23" ports_indexed="1*p06+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="6" cycles_addr_index="6" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="ALU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.57" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="1" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="1" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="ALU0/ALU3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.38" TP_unrolled="0.56" uops="1">
          <latency cycles="1" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="1" target_op="3"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="5" cycles_addr_index="5" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="3"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="ALU0/1/2" uops="1"/>
      </architecture>
    </instruction>
  </extension>
  <extension name="AES">
    <instruction asm="AESDEC" category="AES" cpl="3" extension="AES" iclass="AESDEC" iform="AESDEC_XMMdq_XMMdq" isa-set="AES" string="AESDEC (XMM, XMM)" summary="Perform One Round of an AES Decryption Flow" url="uops.info/html-instr/AESDEC_XMM_XMM.html" url-ref="felixcloutier.com/x86/AESDEC.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="WSM">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="6" ports="2*p0+1*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p0+1*p5" uops="3" version="2.2"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p05" uops="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p015+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.14" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p015+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.14" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="AESDEC" category="AES" cpl="3" extension="AES" iclass="AESDEC" iform="AESDEC_XMMdq_MEMdq" isa-set="AES" string="AESDEC (XMM, M128)" summary="Perform One Round of an AES Decryption Flow" url="uops.info/html-instr/AESDEC_XMM_M128.html" url-ref="felixcloutier.com/x86/AESDEC.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="WSM">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="2*p0+1*p2+1*p5" ports_indexed="2*p0+1*p2+1*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p2+1*p5" ports_indexed="2*p0+1*p2+1*p5" uops="4" uops_indexed="4" version="2.2"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p2+1*p5" uops="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="13" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.20" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="13" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.33" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="13" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESDECLAST" category="AES" cpl="3" extension="AES" iclass="AESDECLAST" iform="AESDECLAST_XMMdq_XMMdq" isa-set="AES" string="AESDECLAST (XMM, XMM)" summary="Perform Last Round of an AES Decryption Flow" url="uops.info/html-instr/AESDECLAST_XMM_XMM.html" url-ref="felixcloutier.com/x86/AESDECLAST.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="WSM">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="6" ports="2*p0+1*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p0+1*p5" uops="3" version="2.2"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p05" uops="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p015+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.13" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p015+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.14" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="AESDECLAST" category="AES" cpl="3" extension="AES" iclass="AESDECLAST" iform="AESDECLAST_XMMdq_MEMdq" isa-set="AES" string="AESDECLAST (XMM, M128)" summary="Perform Last Round of an AES Decryption Flow" url="uops.info/html-instr/AESDECLAST_XMM_M128.html" url-ref="felixcloutier.com/x86/AESDECLAST.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="WSM">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="2*p0+1*p2+1*p5" ports_indexed="2*p0+1*p2+1*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p2+1*p5" ports_indexed="2*p0+1*p2+1*p5" uops="4" uops_indexed="4" version="2.2"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p2+1*p5" uops="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="13" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.27" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="13" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.29" TP_ports="1.00" TP_unrolled="0.98" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="13" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESENC" category="AES" cpl="3" extension="AES" iclass="AESENC" iform="AESENC_XMMdq_XMMdq" isa-set="AES" string="AESENC (XMM, XMM)" summary="Perform One Round of an AES Encryption Flow" url="uops.info/html-instr/AESENC_XMM_XMM.html" url-ref="felixcloutier.com/x86/AESENC.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="WSM">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="6" ports="2*p0+1*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p0+1*p5" uops="3" version="2.2"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p05" uops="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p015+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.15" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p015+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.13" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="AESENC" category="AES" cpl="3" extension="AES" iclass="AESENC" iform="AESENC_XMMdq_MEMdq" isa-set="AES" string="AESENC (XMM, M128)" summary="Perform One Round of an AES Encryption Flow" url="uops.info/html-instr/AESENC_XMM_M128.html" url-ref="felixcloutier.com/x86/AESENC.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="WSM">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="2*p0+1*p2+1*p5" ports_indexed="2*p0+1*p2+1*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p2+1*p5" ports_indexed="2*p0+1*p2+1*p5" uops="4" uops_indexed="4" version="2.2"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p2+1*p5" uops="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="13" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.22" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="13" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.29" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="13" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESENCLAST" category="AES" cpl="3" extension="AES" iclass="AESENCLAST" iform="AESENCLAST_XMMdq_XMMdq" isa-set="AES" string="AESENCLAST (XMM, XMM)" summary="Perform Last Round of an AES Encryption Flow" url="uops.info/html-instr/AESENCLAST_XMM_XMM.html" url-ref="felixcloutier.com/x86/AESENCLAST.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="WSM">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="6" ports="2*p0+1*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p0+1*p5" uops="3" version="2.2"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p05" uops="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p015+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.14" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p015+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.13" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="AESENCLAST" category="AES" cpl="3" extension="AES" iclass="AESENCLAST" iform="AESENCLAST_XMMdq_MEMdq" isa-set="AES" string="AESENCLAST (XMM, M128)" summary="Perform Last Round of an AES Encryption Flow" url="uops.info/html-instr/AESENCLAST_XMM_M128.html" url-ref="felixcloutier.com/x86/AESENCLAST.html">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="WSM">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="2*p0+1*p2+1*p5" ports_indexed="2*p0+1*p2+1*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p2+1*p5" ports_indexed="2*p0+1*p2+1*p5" uops="4" uops_indexed="4" version="2.2"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p2+1*p5" uops="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="1" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="13" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.27" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="13" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.33" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p015+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="13" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="1" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="1" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="1" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESIMC" category="AES" cpl="3" extension="AES" iclass="AESIMC" iform="AESIMC_XMMdq_XMMdq" isa-set="AES" string="AESIMC (XMM, XMM)" summary="Perform the AES InvMixColumn Transformation" url="uops.info/html-instr/AESIMC_XMM_XMM.html" url-ref="felixcloutier.com/x86/AESIMC.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="WSM">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="6" ports="2*p0+1*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p0+1*p5" uops="3" version="2.2"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p05" uops="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="12" ports="2*p5" uops="2" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <measurement TP_loop="2.07" TP_ports="2.00" TP_unrolled="2.02" available_simple_decoders="0" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="14" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="12" ports="2*p5" uops="2" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <measurement TP_loop="2.07" TP_ports="2.00" TP_unrolled="2.02" available_simple_decoders="0" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="14" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="14" ports="2*p5" uops="2" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.96" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="14" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.96" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="14" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="2*p0" uops="2" version="2.3"/>
        <IACA TP="1.96" TP_ports="2.00" ports="2*p0" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p0" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="2*p0" uops="2" version="2.3"/>
        <IACA TP="1.96" TP_ports="2.00" ports="2*p0" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p0" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p0" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p0" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.05" available_simple_decoders="3" complex_decoder="1" ports="2*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="9" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p0" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="8" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="AESIMC" category="AES" cpl="3" extension="AES" iclass="AESIMC" iform="AESIMC_XMMdq_MEMdq" isa-set="AES" string="AESIMC (XMM, M128)" summary="Perform the AES InvMixColumn Transformation" url="uops.info/html-instr/AESIMC_XMM_M128.html" url-ref="felixcloutier.com/x86/AESIMC.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="WSM">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="2*p0+1*p2+1*p5" ports_indexed="2*p0+1*p2+1*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p2+1*p5" ports_indexed="2*p0+1*p2+1*p5" uops="4" uops_indexed="4" version="2.2"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p2+1*p5" uops="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="18" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="2.06" TP_ports="2.00" TP_unrolled="2.02" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="18" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="2.07" TP_ports="2.00" TP_unrolled="2.02" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="20" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p23" ports_indexed="2*p0+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p23" ports_indexed="2*p0+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p0+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p23" ports_indexed="2*p0+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p23" ports_indexed="2*p0+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p0+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p0+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p0+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p0+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="AESKEYGENASSIST" category="AES" cpl="3" extension="AES" iclass="AESKEYGENASSIST" iform="AESKEYGENASSIST_XMMdq_XMMdq_IMMb" isa-set="AES" string="AESKEYGENASSIST (XMM, XMM, I8)" summary="AES Round Key Generation Assist" url="uops.info/html-instr/AESKEYGENASSIST_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/AESKEYGENASSIST.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="WSM">
        <IACA TP="2.20" TP_no_interiteration="2.00" TP_ports="2.00" latency="12" ports="2*p0+1*p015+1*p2+1*p5" uops="5" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p0+1*p015+1*p2+1*p5" uops="5" version="2.2"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" complex_decoder="1" ports="2*p0+1*p015+1*p5" uops="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <measurement TP_loop="8.00" TP_ports="7.00" TP_unrolled="8.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p015+1*p15+7*p5" uops="11" uops_MITE="3" uops_MS="8" uops_retire_slots="11">
          <latency cycles="10" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="8.00" TP_ports="7.00" TP_unrolled="8.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p015+1*p15+7*p5" uops="11" uops_MITE="3" uops_MS="8" uops_retire_slots="11">
          <latency cycles="10" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="7.00" ports="2*p0+2*p015+7*p5" uops="11" version="2.2"/>
        <IACA TP="7.00" TP_ports="7.00" ports="2*p0+2*p015+7*p5" uops="11" version="2.3"/>
        <IACA TP="6.68" TP_ports="7.00" ports="2*p0+2*p015+7*p5" uops="11" version="3.0"/>
        <measurement TP_loop="8.46" TP_ports="8.00" TP_unrolled="8.47" available_simple_decoders="0" complex_decoder="1" ports="2*p0+8*p5" uops="10" uops_MITE="3" uops_MS="7" uops_retire_slots="10">
          <latency cycles="9" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="7.00" ports="2*p0+2*p015+7*p5" uops="11" version="2.2"/>
        <IACA TP="7.00" TP_ports="7.00" ports="2*p0+2*p015+7*p5" uops="11" version="2.3"/>
        <IACA TP="6.67" TP_ports="7.00" ports="2*p0+2*p015+7*p5" uops="11" version="3.0"/>
        <measurement TP_loop="8.54" TP_ports="8.00" TP_unrolled="8.50" available_simple_decoders="0" complex_decoder="1" ports="2*p0+8*p5" uops="10" uops_MITE="3" uops_MS="7" uops_retire_slots="10">
          <latency cycles="9" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="6.00" TP_ports="6.00" ports="3*p0+2*p015+6*p5" uops="11" version="2.3"/>
        <IACA TP="5.71" TP_ports="6.00" ports="3*p0+2*p015+6*p5" uops="11" version="3.0"/>
        <measurement TP_loop="12.00" TP_ports="7.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="5*p0+1*p06+7*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="6.00" TP_ports="6.00" ports="3*p0+2*p015+6*p5" uops="11" version="2.3"/>
        <IACA TP="5.71" TP_ports="6.00" ports="3*p0+2*p015+6*p5" uops="11" version="3.0"/>
        <measurement TP_loop="12.00" TP_ports="7.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="5*p0+1*p06+7*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="12.00" TP_ports="7.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="5*p0+1*p06+7*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="12.00" TP_ports="7.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="5*p0+1*p06+7*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="12.00" TP_ports="6.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="4*p0+1*p01+1*p015+1*p06+6*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="12.00" TP_ports="7.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="5*p0+1*p06+7*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="12.00" TP_ports="4.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="4*p0+1*p01+1*p015+1*p06+3*p15+3*p5" uops="13" uops_MITE="3" uops_MS="11" uops_retire_slots="14">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="12.00" TP_ports="4.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="4*p0+1*p01+1*p015+1*p06+3*p15+3*p5" uops="13" uops_MITE="3" uops_MS="11" uops_retire_slots="14">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="12.00" TP_ports="4.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="4*p0+1*p01+1*p015+1*p06+3*p15+3*p5" uops="13" uops_MITE="3" uops_MS="11" uops_retire_slots="14">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="AESKEYGENASSIST" category="AES" cpl="3" extension="AES" iclass="AESKEYGENASSIST" iform="AESKEYGENASSIST_XMMdq_MEMdq_IMMb" isa-set="AES" string="AESKEYGENASSIST (XMM, M128, I8)" summary="AES Round Key Generation Assist" url="uops.info/html-instr/AESKEYGENASSIST_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/AESKEYGENASSIST.html">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="WSM">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="6" ports="2*p0+1*p015+1*p5" uops="4" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p0+1*p015+1*p5" uops="4" version="2.2"/>
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" complex_decoder="1" ports="2*p0+1*p015+1*p2+1*p5" uops="5" uops_MS="1" uops_retire_slots="5">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SNB">
        <measurement TP_loop="6.95" TP_ports="6.00" TP_unrolled="6.94" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p015+1*p15+1*p23+6*p5" uops="11" uops_MITE="3" uops_MS="8" uops_retire_slots="11">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="6.94" TP_ports="6.00" TP_unrolled="6.91" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p015+1*p15+1*p23+6*p5" uops="11" uops_MITE="3" uops_MS="8" uops_retire_slots="11">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="7.00" ports="2*p0+1*p015+1*p23+7*p5" uops="11" version="2.2"/>
        <IACA TP="7.00" TP_ports="7.00" ports="2*p0+1*p015+1*p23+7*p5" uops="11" version="2.3"/>
        <IACA TP="6.90" TP_ports="7.00" ports="2*p0+1*p015+1*p23+7*p5" uops="11" version="3.0"/>
        <measurement TP_loop="7.78" TP_ports="7.00" TP_unrolled="7.83" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p23+7*p5" uops="10" uops_MITE="3" uops_MS="7" uops_retire_slots="10">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="7.00" ports="2*p0+1*p015+1*p23+7*p5" uops="11" version="2.2"/>
        <IACA TP="7.00" TP_ports="7.00" ports="2*p0+1*p015+1*p23+7*p5" uops="11" version="2.3"/>
        <IACA TP="6.89" TP_ports="7.00" ports="2*p0+1*p015+1*p23+7*p5" uops="11" version="3.0"/>
        <measurement TP_loop="7.80" TP_ports="7.00" TP_unrolled="7.89" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p23+7*p5" uops="10" uops_MITE="3" uops_MS="7" uops_retire_slots="10">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="6.00" TP_ports="6.00" ports="3*p0+1*p015+1*p23+6*p5" uops="11" version="2.3"/>
        <IACA TP="5.89" TP_ports="6.00" ports="3*p0+1*p015+1*p23+6*p5" uops="11" version="3.0"/>
        <measurement TP_loop="12.00" TP_ports="6.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="5*p0+1*p06+1*p23+6*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="6.00" TP_ports="6.00" ports="3*p0+1*p015+1*p23+6*p5" uops="11" version="2.3"/>
        <IACA TP="5.89" TP_ports="6.00" ports="3*p0+1*p015+1*p23+6*p5" uops="11" version="3.0"/>
        <measurement TP_loop="12.00" TP_ports="6.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="5*p0+1*p06+1*p23+6*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="12.00" TP_ports="6.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="5*p0+1*p06+1*p23+6*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="12.00" TP_ports="6.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="5*p0+1*p06+1*p23+6*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="12.00" TP_ports="6.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="4*p0+1*p01+1*p06+1*p23+6*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="12.00" TP_ports="6.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="5*p0+1*p06+1*p23+6*p5" uops="13" uops_MITE="3" uops_MS="10" uops_retire_slots="13">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="12.00" TP_ports="4.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="4*p0+1*p01+1*p06+3*p15+1*p23+3*p5" uops="13" uops_MITE="3" uops_MS="11" uops_retire_slots="14">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="12.00" TP_ports="4.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="4*p0+1*p01+1*p06+3*p15+1*p23+3*p5" uops="13" uops_MITE="3" uops_MS="11" uops_retire_slots="14">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="12.00" TP_ports="4.00" TP_unrolled="12.00" available_simple_decoders="0" complex_decoder="1" ports="4*p0+1*p01+1*p06+3*p15+1*p23+3*p5" uops="13" uops_MITE="3" uops_MS="11" uops_retire_slots="14">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
  </extension>
  <extension name="AMD_INVLPGB">
    <instruction asm="INVLPGB" category="SYSTEM" cpl="0" extension="AMD_INVLPGB" iclass="INVLPGB" iform="INVLPGB_RAX_EDX_ECX" isa-set="AMD_INVLPGB" string="INVLPGB (RAX, EDX, ECX)" url="uops.info/html-instr/INVLPGB_RAX_EDX_ECX.html">
      <operand idx="1" implicit="1" name="REG0" r="1" type="reg">RAX</operand>
      <operand idx="2" implicit="1" name="REG1" r="1" type="reg">EDX</operand>
      <operand idx="3" implicit="1" name="REG2" r="1" type="reg">ECX</operand>
      <architecture name="ZEN3">
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="TLBSYNC" category="SYSTEM" cpl="0" extension="AMD_INVLPGB" iclass="TLBSYNC" iform="TLBSYNC" isa-set="AMD_INVLPGB" string="TLBSYNC" url="uops.info/html-instr/TLBSYNC.html">
      <architecture name="ZEN3">
        <doc uops="ucode"/>
      </architecture>
    </instruction>
  </extension>
  <extension name="AMX_BF16">
    <instruction asm="TDPBF16PS" category="AMX_TILE" cpl="3" extension="AMX_BF16" iclass="TDPBF16PS" iform="TDPBF16PS_TMMf32_TMMu32_TMMu32" isa-set="AMX_BF16" string="TDPBF16PS (MM, MM, MM)" url="uops.info/html-instr/TDPBF16PS_MM_MM_MM.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="0" xtype="f32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
  </extension>
  <extension name="AMX_INT8">
    <instruction asm="TDPBSSD" category="AMX_TILE" cpl="3" extension="AMX_INT8" iclass="TDPBSSD" iform="TDPBSSD_TMMi32_TMMu32_TMMu32" isa-set="AMX_INT8" string="TDPBSSD (MM, MM, MM)" url="uops.info/html-instr/TDPBSSD_MM_MM_MM.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="0" xtype="i32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
    <instruction asm="TDPBSUD" category="AMX_TILE" cpl="3" extension="AMX_INT8" iclass="TDPBSUD" iform="TDPBSUD_TMMi32_TMMu32_TMMu32" isa-set="AMX_INT8" string="TDPBSUD (MM, MM, MM)" url="uops.info/html-instr/TDPBSUD_MM_MM_MM.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="0" xtype="i32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
    <instruction asm="TDPBUSD" category="AMX_TILE" cpl="3" extension="AMX_INT8" iclass="TDPBUSD" iform="TDPBUSD_TMMi32_TMMu32_TMMu32" isa-set="AMX_INT8" string="TDPBUSD (MM, MM, MM)" url="uops.info/html-instr/TDPBUSD_MM_MM_MM.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="0" xtype="i32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
    <instruction asm="TDPBUUD" category="AMX_TILE" cpl="3" extension="AMX_INT8" iclass="TDPBUUD" iform="TDPBUUD_TMMu32_TMMu32_TMMu32" isa-set="AMX_INT8" string="TDPBUUD (MM, MM, MM)" url="uops.info/html-instr/TDPBUUD_MM_MM_MM.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" w="1" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
  </extension>
  <extension name="AMX_TILE">
    <instruction asm="LDTILECFG" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="LDTILECFG" iform="LDTILECFG_MEM" isa-set="AMX_TILE" string="LDTILECFG (M512)" url="uops.info/html-instr/LDTILECFG_M512.html" vex="1">
      <operand idx="1" memory-prefix="zmmword ptr" name="MEM0" r="1" type="mem" width="512" xtype="i32"/>
    </instruction>
    <instruction asm="STTILECFG" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="STTILECFG" iform="STTILECFG_MEM" isa-set="AMX_TILE" string="STTILECFG (M512)" url="uops.info/html-instr/STTILECFG_M512.html" vex="1">
      <operand idx="1" memory-prefix="zmmword ptr" name="MEM0" type="mem" w="1" width="512" xtype="i32"/>
    </instruction>
    <instruction asm="TILELOADD" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="TILELOADD" iform="TILELOADD_TMMu32_MEMu32" isa-set="AMX_TILE" string="TILELOADD (MM, M0)" url="uops.info/html-instr/TILELOADD_MM_M0.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="MEM0" r="1" type="mem" width="0" xtype="u32"/>
    </instruction>
    <instruction asm="TILELOADDT1" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="TILELOADDT1" iform="TILELOADDT1_TMMu32_MEMu32" isa-set="AMX_TILE" string="TILELOADDT1 (MM, M0)" url="uops.info/html-instr/TILELOADDT1_MM_M0.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
      <operand idx="2" name="MEM0" r="1" type="mem" width="0" xtype="u32"/>
    </instruction>
    <instruction asm="TILERELEASE" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="TILERELEASE" iform="TILERELEASE" isa-set="AMX_TILE" string="TILERELEASE" url="uops.info/html-instr/TILERELEASE.html" vex="1">
    </instruction>
    <instruction asm="TILESTORED" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="TILESTORED" iform="TILESTORED_MEMu32_TMMu32" isa-set="AMX_TILE" string="TILESTORED (M0, MM)" url="uops.info/html-instr/TILESTORED_M0_MM.html" vex="1">
      <operand idx="1" name="MEM0" type="mem" w="1" width="0" xtype="u32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
    <instruction asm="TILEZERO" category="AMX_TILE" cpl="3" extension="AMX_TILE" iclass="TILEZERO" iform="TILEZERO_TMMu32" isa-set="AMX_TILE" string="TILEZERO (MM)" url="uops.info/html-instr/TILEZERO_MM.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="0" xtype="u32">TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7</operand>
    </instruction>
  </extension>
  <extension name="AVX">
    <instruction asm="VADDPD" category="AVX" cpl="3" extension="AVX" iclass="VADDPD" iform="VADDPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VADDPD (XMM, XMM, M128)" summary="Add Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VADDPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/ADDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDPD" category="AVX" cpl="3" extension="AVX" iclass="VADDPD" iform="VADDPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VADDPD (XMM, XMM, XMM)" summary="Add Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VADDPD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/ADDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VADDPD" category="AVX" cpl="3" extension="AVX" iclass="VADDPD" iform="VADDPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VADDPD (YMM, YMM, M256)" summary="Add Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VADDPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/ADDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDPD" category="AVX" cpl="3" extension="AVX" iclass="VADDPD" iform="VADDPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VADDPD (YMM, YMM, YMM)" summary="Add Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VADDPD_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/ADDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP2/3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VADDPS" category="AVX" cpl="3" extension="AVX" iclass="VADDPS" iform="VADDPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VADDPS (XMM, XMM, M128)" summary="Add Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VADDPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/ADDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDPS" category="AVX" cpl="3" extension="AVX" iclass="VADDPS" iform="VADDPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VADDPS (XMM, XMM, XMM)" summary="Add Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VADDPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/ADDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VADDPS" category="AVX" cpl="3" extension="AVX" iclass="VADDPS" iform="VADDPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VADDPS (YMM, YMM, M256)" summary="Add Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VADDPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/ADDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDPS" category="AVX" cpl="3" extension="AVX" iclass="VADDPS" iform="VADDPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VADDPS (YMM, YMM, YMM)" summary="Add Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VADDPS_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/ADDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP2/3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VADDSD" category="AVX" cpl="3" extension="AVX" iclass="VADDSD" iform="VADDSD_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VADDSD (XMM, XMM, M64)" summary="Add Scalar Double-Precision Floating-Point Values" url="uops.info/html-instr/VADDSD_XMM_XMM_M64.html" url-ref="felixcloutier.com/x86/ADDSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSD" category="AVX" cpl="3" extension="AVX" iclass="VADDSD" iform="VADDSD_XMMdq_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VADDSD (XMM, XMM, XMM)" summary="Add Scalar Double-Precision Floating-Point Values" url="uops.info/html-instr/VADDSD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/ADDSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VADDSS" category="AVX" cpl="3" extension="AVX" iclass="VADDSS" iform="VADDSS_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VADDSS (XMM, XMM, M32)" summary="Add Scalar Single-Precision Floating-Point Values" url="uops.info/html-instr/VADDSS_XMM_XMM_M32.html" url-ref="felixcloutier.com/x86/ADDSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSS" category="AVX" cpl="3" extension="AVX" iclass="VADDSS" iform="VADDSS_XMMdq_XMMdq_XMMd" isa-set="AVX" mxcsr="1" string="VADDSS (XMM, XMM, XMM)" summary="Add Scalar Single-Precision Floating-Point Values" url="uops.info/html-instr/VADDSS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/ADDSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPD" iform="VADDSUBPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VADDSUBPD (XMM, XMM, M128)" summary="Packed Double-FP Add/Subtract" url="uops.info/html-instr/VADDSUBPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/ADDSUBPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPD" iform="VADDSUBPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VADDSUBPD (XMM, XMM, XMM)" summary="Packed Double-FP Add/Subtract" url="uops.info/html-instr/VADDSUBPD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/ADDSUBPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPD" iform="VADDSUBPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VADDSUBPD (YMM, YMM, M256)" summary="Packed Double-FP Add/Subtract" url="uops.info/html-instr/VADDSUBPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/ADDSUBPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPD" iform="VADDSUBPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VADDSUBPD (YMM, YMM, YMM)" summary="Packed Double-FP Add/Subtract" url="uops.info/html-instr/VADDSUBPD_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/ADDSUBPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP2/3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPS" iform="VADDSUBPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VADDSUBPS (XMM, XMM, M128)" summary="Packed Single-FP Add/Subtract" url="uops.info/html-instr/VADDSUBPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/ADDSUBPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPS" iform="VADDSUBPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VADDSUBPS (XMM, XMM, XMM)" summary="Packed Single-FP Add/Subtract" url="uops.info/html-instr/VADDSUBPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/ADDSUBPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPS" iform="VADDSUBPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VADDSUBPS (YMM, YMM, M256)" summary="Packed Single-FP Add/Subtract" url="uops.info/html-instr/VADDSUBPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/ADDSUBPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VADDSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VADDSUBPS" iform="VADDSUBPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VADDSUBPS (YMM, YMM, YMM)" summary="Packed Single-FP Add/Subtract" url="uops.info/html-instr/VADDSUBPS_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/ADDSUBPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP2/3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VANDNPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPD" iform="VANDNPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VANDNPD (XMM, XMM, M128)" summary="Bitwise Logical AND NOT of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VANDNPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/ANDNPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDNPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPD" iform="VANDNPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VANDNPD (XMM, XMM, XMM)" summary="Bitwise Logical AND NOT of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VANDNPD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/ANDNPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.25" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.25" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.17" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VANDNPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPD" iform="VANDNPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VANDNPD (YMM, YMM, M256)" summary="Bitwise Logical AND NOT of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VANDNPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/ANDNPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="u64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDNPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPD" iform="VANDNPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VANDNPD (YMM, YMM, YMM)" summary="Bitwise Logical AND NOT of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VANDNPD_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/ANDNPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.25" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.17" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VANDNPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPS" iform="VANDNPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VANDNPS (XMM, XMM, M128)" summary="Bitwise Logical AND NOT of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VANDNPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/ANDNPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDNPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPS" iform="VANDNPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VANDNPS (XMM, XMM, XMM)" summary="Bitwise Logical AND NOT of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VANDNPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/ANDNPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.25" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.25" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.17" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VANDNPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPS" iform="VANDNPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VANDNPS (YMM, YMM, M256)" summary="Bitwise Logical AND NOT of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VANDNPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/ANDNPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDNPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDNPS" iform="VANDNPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VANDNPS (YMM, YMM, YMM)" summary="Bitwise Logical AND NOT of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VANDNPS_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/ANDNPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.25" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.17" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VANDPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPD" iform="VANDPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VANDPD (XMM, XMM, M128)" summary="Bitwise Logical AND of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VANDPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/ANDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPD" iform="VANDPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VANDPD (XMM, XMM, XMM)" summary="Bitwise Logical AND of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VANDPD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/ANDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VANDPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPD" iform="VANDPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VANDPD (YMM, YMM, M256)" summary="Bitwise Logical AND of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VANDPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/ANDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="u64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPD" iform="VANDPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VANDPD (YMM, YMM, YMM)" summary="Bitwise Logical AND of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VANDPD_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/ANDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VANDPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPS" iform="VANDPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VANDPS (XMM, XMM, M128)" summary="Bitwise Logical AND of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VANDPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/ANDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPS" iform="VANDPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VANDPS (XMM, XMM, XMM)" summary="Bitwise Logical AND of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VANDPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/ANDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VANDPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPS" iform="VANDPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VANDPS (YMM, YMM, M256)" summary="Bitwise Logical AND of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VANDPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/ANDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VANDPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VANDPS" iform="VANDPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VANDPS (YMM, YMM, YMM)" summary="Bitwise Logical AND of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VANDPS_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/ANDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPD" iform="VBLENDPD_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VBLENDPD (XMM, XMM, M128, I8)" summary="Blend Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VBLENDPD_XMM_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/BLENDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p05+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p05+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p015+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPD" iform="VBLENDPD_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VBLENDPD (XMM, XMM, XMM, I8)" summary="Blend Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VBLENDPD_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/BLENDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.37" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPD" iform="VBLENDPD_YMMqq_YMMqq_MEMqq_IMMb" isa-set="AVX" string="VBLENDPD (YMM, YMM, M256, I8)" summary="Blend Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VBLENDPD_YMM_YMM_M256_I8.html" url-ref="felixcloutier.com/x86/BLENDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="8" ports="1*p05+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="8" ports="1*p05+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="8" ports="1*p015+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPD" iform="VBLENDPD_YMMqq_YMMqq_YMMqq_IMMb" isa-set="AVX" string="VBLENDPD (YMM, YMM, YMM, I8)" summary="Blend Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VBLENDPD_YMM_YMM_YMM_I8.html" url-ref="felixcloutier.com/x86/BLENDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.37" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPS" iform="VBLENDPS_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VBLENDPS (XMM, XMM, M128, I8)" summary="Blend Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VBLENDPS_XMM_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/BLENDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p05+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p05+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p015+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPS" iform="VBLENDPS_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VBLENDPS (XMM, XMM, XMM, I8)" summary="Blend Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VBLENDPS_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/BLENDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.38" TP_ports="0.33" TP_unrolled="0.38" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.37" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPS" iform="VBLENDPS_YMMqq_YMMqq_MEMqq_IMMb" isa-set="AVX" string="VBLENDPS (YMM, YMM, M256, I8)" summary="Blend Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VBLENDPS_YMM_YMM_M256_I8.html" url-ref="felixcloutier.com/x86/BLENDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="8" ports="1*p05+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="8" ports="1*p05+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="8" ports="1*p015+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDPS" iform="VBLENDPS_YMMqq_YMMqq_YMMqq_IMMb" isa-set="AVX" string="VBLENDPS (YMM, YMM, YMM, I8)" summary="Blend Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VBLENDPS_YMM_YMM_YMM_I8.html" url-ref="felixcloutier.com/x86/BLENDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.38" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.37" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.38" TP_ports="0.33" TP_unrolled="0.38" ports="1*FP013" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.37" ports="1*FP0123" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPD" iform="VBLENDVPD_XMMdq_XMMdq_MEMdq_XMMdq" isa-set="AVX" string="VBLENDVPD (XMM, XMM, M128, XMM)" summary="Variable Blend Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VBLENDVPD_XMM_XMM_M128_XMM.html" url-ref="felixcloutier.com/x86/BLENDVPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <operand idx="4" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="8" ports="2*p05+1*p23" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="8" ports="2*p05+1*p23" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="8" ports="1*p23+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.95" TP_indexed="0.95" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="2.00" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.95" TP_indexed="0.95" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="2.00" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPD" iform="VBLENDVPD_XMMdq_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VBLENDVPD (XMM, XMM, XMM, XMM)" summary="Variable Blend Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VBLENDVPD_XMM_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/BLENDVPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="REG3" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="2*p05" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="2*p05" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="2" ports="2*p5" uops="2" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.90" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.90" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="2.3"/>
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="2.3"/>
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPD" iform="VBLENDVPD_YMMqq_YMMqq_MEMqq_YMMqq" isa-set="AVX" string="VBLENDVPD (YMM, YMM, M256, YMM)" summary="Variable Blend Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VBLENDVPD_YMM_YMM_M256_YMM.html" url-ref="felixcloutier.com/x86/BLENDVPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <operand idx="4" name="REG2" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="2*p05+1*p23" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="2*p05+1*p23" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="9" ports="1*p23+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.95" TP_indexed="0.95" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="2.00" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.95" TP_indexed="0.95" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="2.00" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPD" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPD" iform="VBLENDVPD_YMMqq_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VBLENDVPD (YMM, YMM, YMM, YMM)" summary="Variable Blend Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VBLENDVPD_YMM_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/BLENDVPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="REG3" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="2*p05" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="2*p05" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="2" ports="2*p5" uops="2" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.90" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.90" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="2.3"/>
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="2.3"/>
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPS" iform="VBLENDVPS_XMMdq_XMMdq_MEMdq_XMMdq" isa-set="AVX" string="VBLENDVPS (XMM, XMM, M128, XMM)" summary="Variable Blend Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VBLENDVPS_XMM_XMM_M128_XMM.html" url-ref="felixcloutier.com/x86/BLENDVPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <operand idx="4" name="REG2" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="8" ports="2*p05+1*p23" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="8" ports="2*p05+1*p23" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="8" ports="1*p23+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.95" TP_indexed="0.95" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="2.00" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.95" TP_indexed="0.95" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="2.00" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPS" iform="VBLENDVPS_XMMdq_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VBLENDVPS (XMM, XMM, XMM, XMM)" summary="Variable Blend Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VBLENDVPS_XMM_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/BLENDVPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="REG3" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="2*p05" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="2*p05" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="2" ports="2*p5" uops="2" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.90" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.90" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="2.3"/>
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="2.3"/>
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPS" iform="VBLENDVPS_YMMqq_YMMqq_MEMqq_YMMqq" isa-set="AVX" string="VBLENDVPS (YMM, YMM, M256, YMM)" summary="Variable Blend Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VBLENDVPS_YMM_YMM_M256_YMM.html" url-ref="felixcloutier.com/x86/BLENDVPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <operand idx="4" name="REG2" r="1" type="reg" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="2*p05+1*p23" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="2*p05+1*p23" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="9" ports="1*p23+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.95" TP_indexed="0.95" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="2.00" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.95" TP_indexed="0.95" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="2.00" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="0.90" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VBLENDVPS" category="AVX" cpl="3" extension="AVX" iclass="VBLENDVPS" iform="VBLENDVPS_YMMqq_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VBLENDVPS (YMM, YMM, YMM, YMM)" summary="Variable Blend Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VBLENDVPS_YMM_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/BLENDVPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="REG3" r="1" type="reg" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="2*p05" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="2*p05" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="2*p05" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p05" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="2" ports="2*p5" uops="2" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.90" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.90" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="2.3"/>
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="2.3"/>
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VBROADCASTF128" category="BROADCAST" cpl="3" extension="AVX" iclass="VBROADCASTF128" iform="VBROADCASTF128_YMMqq_MEMdq" isa-set="AVX" string="VBROADCASTF128 (YMM, M128)" summary="Load with Broadcast Floating-Point Data" url="uops.info/html-instr/VBROADCASTF128_YMM_M128.html" url-ref="felixcloutier.com/x86/VBROADCAST.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="LD, FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="LD, FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" ports="LD" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VBROADCASTSD" category="BROADCAST" cpl="3" extension="AVX" iclass="VBROADCASTSD" iform="VBROADCASTSD_YMMqq_MEMq" isa-set="AVX" string="VBROADCASTSD (YMM, M64)" summary="Load with Broadcast Floating-Point Data" url="uops.info/html-instr/VBROADCASTSD_YMM_M64.html" url-ref="felixcloutier.com/x86/VBROADCAST.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="LD, FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="LD, FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" ports="LD" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VBROADCASTSS" category="BROADCAST" cpl="3" extension="AVX" iclass="VBROADCASTSS" iform="VBROADCASTSS_XMMdq_MEMd" isa-set="AVX" string="VBROADCASTSS (XMM, M32)" summary="Load with Broadcast Floating-Point Data" url="uops.info/html-instr/VBROADCASTSS_XMM_M32.html" url-ref="felixcloutier.com/x86/VBROADCAST.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="6" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="6" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" ports="LD" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" ports="LD" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" ports="LD" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VBROADCASTSS" category="BROADCAST" cpl="3" extension="AVX" iclass="VBROADCASTSS" iform="VBROADCASTSS_YMMqq_MEMd" isa-set="AVX" string="VBROADCASTSS (YMM, M32)" summary="Load with Broadcast Floating-Point Data" url="uops.info/html-instr/VBROADCASTSS_YMM_M32.html" url-ref="felixcloutier.com/x86/VBROADCAST.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="LD, FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="LD, FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" ports="LD" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCMPPD" category="AVX" cpl="3" extension="AVX" iclass="VCMPPD" iform="VCMPPD_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPD (XMM, XMM, M128, I8)" summary="Compare Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VCMPPD_XMM_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/CMPPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPPD" category="AVX" cpl="3" extension="AVX" iclass="VCMPPD" iform="VCMPPD_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPD (XMM, XMM, XMM, I8)" summary="Compare Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VCMPPD_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/CMPPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCMPPD" category="AVX" cpl="3" extension="AVX" iclass="VCMPPD" iform="VCMPPD_YMMqq_YMMqq_MEMqq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPD (YMM, YMM, M256, I8)" summary="Compare Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VCMPPD_YMM_YMM_M256_I8.html" url-ref="felixcloutier.com/x86/CMPPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="10" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="10" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="10" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPPD" category="AVX" cpl="3" extension="AVX" iclass="VCMPPD" iform="VCMPPD_YMMqq_YMMqq_YMMqq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPD (YMM, YMM, YMM, I8)" summary="Compare Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VCMPPD_YMM_YMM_YMM_I8.html" url-ref="felixcloutier.com/x86/CMPPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCMPPS" category="AVX" cpl="3" extension="AVX" iclass="VCMPPS" iform="VCMPPS_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPS (XMM, XMM, M128, I8)" summary="Compare Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VCMPPS_XMM_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/CMPPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPPS" category="AVX" cpl="3" extension="AVX" iclass="VCMPPS" iform="VCMPPS_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPS (XMM, XMM, XMM, I8)" summary="Compare Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VCMPPS_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/CMPPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCMPPS" category="AVX" cpl="3" extension="AVX" iclass="VCMPPS" iform="VCMPPS_YMMqq_YMMqq_MEMqq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPS (YMM, YMM, M256, I8)" summary="Compare Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VCMPPS_YMM_YMM_M256_I8.html" url-ref="felixcloutier.com/x86/CMPPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="10" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="10" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="10" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPPS" category="AVX" cpl="3" extension="AVX" iclass="VCMPPS" iform="VCMPPS_YMMqq_YMMqq_YMMqq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPPS (YMM, YMM, YMM, I8)" summary="Compare Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VCMPPS_YMM_YMM_YMM_I8.html" url-ref="felixcloutier.com/x86/CMPPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCMPSD" category="AVX" cpl="3" extension="AVX" iclass="VCMPSD" iform="VCMPSD_XMMdq_XMMdq_MEMq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPSD (XMM, XMM, M64, I8)" summary="Compare Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VCMPSD_XMM_XMM_M64_I8.html" url-ref="felixcloutier.com/x86/CMPSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPSD" category="AVX" cpl="3" extension="AVX" iclass="VCMPSD" iform="VCMPSD_XMMdq_XMMdq_XMMq_IMMb" isa-set="AVX" mxcsr="1" string="VCMPSD (XMM, XMM, XMM, I8)" summary="Compare Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VCMPSD_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/CMPSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCMPSS" category="AVX" cpl="3" extension="AVX" iclass="VCMPSS" iform="VCMPSS_XMMdq_XMMdq_MEMd_IMMb" isa-set="AVX" mxcsr="1" string="VCMPSS (XMM, XMM, M32, I8)" summary="Compare Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VCMPSS_XMM_XMM_M32_I8.html" url-ref="felixcloutier.com/x86/CMPSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p1+1*p23" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCMPSS" category="AVX" cpl="3" extension="AVX" iclass="VCMPSS" iform="VCMPSS_XMMdq_XMMdq_XMMd_IMMb" isa-set="AVX" mxcsr="1" string="VCMPSS (XMM, XMM, XMM, I8)" summary="Compare Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VCMPSS_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/CMPSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCOMISD" category="AVX" cpl="3" extension="AVX" iclass="VCOMISD" iform="VCOMISD_XMMq_MEMq" isa-set="AVX" mxcsr="1" string="VCOMISD (XMM, M64)" summary="Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS" url="uops.info/html-instr/VCOMISD_XMM_M64.html" url-ref="felixcloutier.com/x86/COMISD.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="3" name="REG1" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCOMISD" category="AVX" cpl="3" extension="AVX" iclass="VCOMISD" iform="VCOMISD_XMMq_XMMq" isa-set="AVX" mxcsr="1" string="VCOMISD (XMM, XMM)" summary="Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS" url="uops.info/html-instr/VCOMISD_XMM_XMM.html" url-ref="felixcloutier.com/x86/COMISD.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="3" name="REG2" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP0/1, FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP0/1,FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP2/3, FP4" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCOMISS" category="AVX" cpl="3" extension="AVX" iclass="VCOMISS" iform="VCOMISS_XMMd_MEMd" isa-set="AVX" mxcsr="1" string="VCOMISS (XMM, M32)" summary="Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS" url="uops.info/html-instr/VCOMISS_XMM_M32.html" url-ref="felixcloutier.com/x86/COMISS.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="3" name="REG1" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCOMISS" category="AVX" cpl="3" extension="AVX" iclass="VCOMISS" iform="VCOMISS_XMMd_XMMd" isa-set="AVX" mxcsr="1" string="VCOMISS (XMM, XMM)" summary="Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS" url="uops.info/html-instr/VCOMISS_XMM_XMM.html" url-ref="felixcloutier.com/x86/COMISS.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="3" name="REG2" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP0/1, FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP0/1,FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="1" target_op="3"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="3"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP2/3, FP4" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PD" iform="VCVTDQ2PD_XMMdq_MEMq" isa-set="AVX" string="VCVTDQ2PD (XMM, M64)" summary="Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VCVTDQ2PD_XMM_M64.html" url-ref="felixcloutier.com/x86/CVTDQ2PD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23+1*p5" ports_indexed="1*p0+1*p23+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23+1*p5" ports_indexed="1*p0+1*p23+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.52" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PD" iform="VCVTDQ2PD_XMMdq_XMMq" isa-set="AVX" string="VCVTDQ2PD (XMM, XMM)" summary="Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VCVTDQ2PD_XMM_XMM.html" url-ref="felixcloutier.com/x86/CVTDQ2PD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="5.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="7" ports="FP1/2, FP3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP1/2, FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PD" iform="VCVTDQ2PD_YMMqq_MEMdq" isa-set="AVX" string="VCVTDQ2PD (YMM, M128)" summary="Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VCVTDQ2PD_YMM_M128.html" url-ref="felixcloutier.com/x86/CVTDQ2PD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23+1*p5" ports_indexed="1*p0+1*p23+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23+1*p5" ports_indexed="1*p0+1*p23+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="7">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PD" iform="VCVTDQ2PD_YMMqq_XMMdq" isa-set="AVX" string="VCVTDQ2PD (YMM, XMM)" summary="Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VCVTDQ2PD_YMM_XMM.html" url-ref="felixcloutier.com/x86/CVTDQ2PD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01+1*FP23" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PS" iform="VCVTDQ2PS_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTDQ2PS (XMM, M128)" summary="Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VCVTDQ2PS_XMM_M128.html" url-ref="felixcloutier.com/x86/CVTDQ2PS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.52" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.52" TP_unrolled_indexed="0.52" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PS" iform="VCVTDQ2PS_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTDQ2PS (XMM, XMM)" summary="Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VCVTDQ2PS_XMM_XMM.html" url-ref="felixcloutier.com/x86/CVTDQ2PS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PS" iform="VCVTDQ2PS_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VCVTDQ2PS (YMM, M256)" summary="Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VCVTDQ2PS_YMM_M256.html" url-ref="felixcloutier.com/x86/CVTDQ2PS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.52" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.52" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTDQ2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTDQ2PS" iform="VCVTDQ2PS_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VCVTDQ2PS (YMM, YMM)" summary="Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VCVTDQ2PS_YMM_YMM.html" url-ref="felixcloutier.com/x86/CVTDQ2PS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="4" ports="FP3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2DQ" iform="VCVTPD2DQ_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTPD2DQ (XMM, M128)" summary="Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url="uops.info/html-instr/VCVTPD2DQ_XMM_M128.html" url-ref="felixcloutier.com/x86/CVTPD2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2DQ" iform="VCVTPD2DQ_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTPD2DQ (XMM, XMM)" summary="Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url="uops.info/html-instr/VCVTPD2DQ_XMM_XMM.html" url-ref="felixcloutier.com/x86/CVTPD2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.89" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.89" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="5.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="7" ports="FP3, FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP3, FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2DQ" iform="VCVTPD2DQ_XMMdq_MEMqq" isa-set="AVX" mxcsr="1" string="VCVTPD2DQ (XMM, M256)" summary="Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url="uops.info/html-instr/VCVTPD2DQ_XMM_M256.html" url-ref="felixcloutier.com/x86/CVTPD2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01+1*FP23" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2DQ" iform="VCVTPD2DQ_XMMdq_YMMqq" isa-set="AVX" mxcsr="1" string="VCVTPD2DQ (XMM, YMM)" summary="Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url="uops.info/html-instr/VCVTPD2DQ_XMM_YMM.html" url-ref="felixcloutier.com/x86/CVTPD2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.84" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.84" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01+1*FP23" uops="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2PS" iform="VCVTPD2PS_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTPD2PS (XMM, M128)" summary="Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VCVTPD2PS_XMM_M128.html" url-ref="felixcloutier.com/x86/CVTPD2PS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2PS" iform="VCVTPD2PS_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTPD2PS (XMM, XMM)" summary="Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VCVTPD2PS_XMM_XMM.html" url-ref="felixcloutier.com/x86/CVTPD2PS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.89" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.89" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="5.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="6" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2PS" iform="VCVTPD2PS_XMMdq_MEMqq" isa-set="AVX" mxcsr="1" string="VCVTPD2PS (XMM, M256)" summary="Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VCVTPD2PS_XMM_M256.html" url-ref="felixcloutier.com/x86/CVTPD2PS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01+1*FP23" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPD2PS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPD2PS" iform="VCVTPD2PS_XMMdq_YMMqq" isa-set="AVX" mxcsr="1" string="VCVTPD2PS (XMM, YMM)" summary="Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VCVTPD2PS_XMM_YMM.html" url-ref="felixcloutier.com/x86/CVTPD2PS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.84" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.84" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01+1*FP23" uops="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2DQ" iform="VCVTPS2DQ_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTPS2DQ (XMM, M128)" summary="Convert Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url="uops.info/html-instr/VCVTPS2DQ_XMM_M128.html" url-ref="felixcloutier.com/x86/CVTPS2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.52" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2DQ" iform="VCVTPS2DQ_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTPS2DQ (XMM, XMM)" summary="Convert Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url="uops.info/html-instr/VCVTPS2DQ_XMM_XMM.html" url-ref="felixcloutier.com/x86/CVTPS2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2DQ" iform="VCVTPS2DQ_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VCVTPS2DQ (YMM, M256)" summary="Convert Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url="uops.info/html-instr/VCVTPS2DQ_YMM_M256.html" url-ref="felixcloutier.com/x86/CVTPS2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2DQ" iform="VCVTPS2DQ_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VCVTPS2DQ (YMM, YMM)" summary="Convert Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url="uops.info/html-instr/VCVTPS2DQ_YMM_YMM.html" url-ref="felixcloutier.com/x86/CVTPS2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="4" ports="FP3, FP3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3, FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2PD" iform="VCVTPS2PD_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VCVTPS2PD (XMM, M64)" summary="Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VCVTPS2PD_XMM_M64.html" url-ref="felixcloutier.com/x86/CVTPS2PD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.52" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.52" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2PD" iform="VCVTPS2PD_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VCVTPS2PD (XMM, XMM)" summary="Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VCVTPS2PD_XMM_XMM.html" url-ref="felixcloutier.com/x86/CVTPS2PD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.48" TP_ports="0.50" ports="1*p01" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p01+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.48" TP_ports="0.50" ports="1*p01" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="5.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2PD" iform="VCVTPS2PD_YMMqq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTPS2PD (YMM, M128)" summary="Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VCVTPS2PD_YMM_M128.html" url-ref="felixcloutier.com/x86/CVTPS2PD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.52" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTPS2PD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTPS2PD" iform="VCVTPS2PD_YMMqq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTPS2PD (YMM, XMM)" summary="Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VCVTPS2PD_YMM_XMM.html" url-ref="felixcloutier.com/x86/CVTPS2PD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.48" TP_ports="0.50" ports="1*p01" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p01+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.48" TP_ports="0.50" ports="1*p01" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01+1*FP23" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSD2SI" iform="VCVTSD2SI_GPR32d_MEMq" isa-set="AVX" mxcsr="1" string="VCVTSD2SI (R32, M64)" summary="Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer" url="uops.info/html-instr/VCVTSD2SI_R32_M64.html" url-ref="felixcloutier.com/x86/CVTSD2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="20" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015+1*p23" uops="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015+1*p23" uops="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="24" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSD2SI" iform="VCVTSD2SI_GPR32d_XMMq" isa-set="AVX" mxcsr="1" string="VCVTSD2SI (R32, XMM)" summary="Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer" url="uops.info/html-instr/VCVTSD2SI_R32_XMM.html" url-ref="felixcloutier.com/x86/CVTSD2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="2.3"/>
        <IACA TP="0.90" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="2.3"/>
        <IACA TP="0.90" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="8" ports="FP3, FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3, FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP2/3, FP4" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSD2SI" iform="VCVTSD2SI_GPR64q_MEMq" isa-set="AVX" mxcsr="1" string="VCVTSD2SI (R64, M64)" summary="Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer" url="uops.info/html-instr/VCVTSD2SI_R64_M64.html" url-ref="felixcloutier.com/x86/CVTSD2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015+1*p23" uops="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015+1*p23" uops="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSD2SI" iform="VCVTSD2SI_GPR64q_XMMq" isa-set="AVX" mxcsr="1" string="VCVTSD2SI (R64, XMM)" summary="Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer" url="uops.info/html-instr/VCVTSD2SI_R64_XMM.html" url-ref="felixcloutier.com/x86/CVTSD2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="2.3"/>
        <IACA TP="0.90" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="2.3"/>
        <IACA TP="0.90" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="8" ports="FP3, FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3, FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP2/3, FP4" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTSD2SS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSD2SS" iform="VCVTSD2SS_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VCVTSD2SS (XMM, XMM, M64)" summary="Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VCVTSD2SS_XMM_XMM_M64.html" url-ref="felixcloutier.com/x86/CVTSD2SS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p23+1*p5" uops="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p015+1*p23+1*p5" ports_indexed="1*p015+1*p23+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSD2SS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSD2SS" iform="VCVTSD2SS_XMMdq_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VCVTSD2SS (XMM, XMM, XMM)" summary="Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VCVTSD2SS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/CVTSD2SS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.89" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.89" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SD" iform="VCVTSI2SD_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VCVTSI2SD (XMM, XMM, M32)" summary="Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VCVTSI2SD_XMM_XMM_M32.html" url-ref="felixcloutier.com/x86/CVTSI2SD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SD" iform="VCVTSI2SD_XMMdq_XMMdq_GPR32d" isa-set="AVX" mxcsr="1" string="VCVTSI2SD (XMM, XMM, R32)" summary="Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VCVTSI2SD_XMM_XMM_R32.html" url-ref="felixcloutier.com/x86/CVTSI2SD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.48" TP_ports="0.50" ports="1*p01" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p01+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.48" TP_ports="0.50" ports="1*p01" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="7" ports="ALU2, FP3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="ALU2, FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="ALU,FP2/3" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SD" iform="VCVTSI2SD_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VCVTSI2SD (XMM, XMM, M64)" summary="Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VCVTSI2SD_XMM_XMM_M64.html" url-ref="felixcloutier.com/x86/CVTSI2SD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SD" iform="VCVTSI2SD_XMMdq_XMMdq_GPR64q" isa-set="AVX" mxcsr="1" string="VCVTSI2SD (XMM, XMM, R64)" summary="Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VCVTSI2SD_XMM_XMM_R64.html" url-ref="felixcloutier.com/x86/CVTSI2SD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.48" TP_ports="0.50" ports="1*p01" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p01+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.48" TP_ports="0.50" ports="1*p01" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="7" ports="ALU2, FP3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="ALU2, FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="ALU,FP2/3" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SS" iform="VCVTSI2SS_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VCVTSI2SS (XMM, XMM, M32)" summary="Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VCVTSI2SS_XMM_XMM_M32.html" url-ref="felixcloutier.com/x86/CVTSI2SS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SS" iform="VCVTSI2SS_XMMdq_XMMdq_GPR32d" isa-set="AVX" mxcsr="1" string="VCVTSI2SS (XMM, XMM, R32)" summary="Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VCVTSI2SS_XMM_XMM_R32.html" url-ref="felixcloutier.com/x86/CVTSI2SS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.48" TP_ports="0.50" ports="1*p01" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p01+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.48" TP_ports="0.50" ports="1*p01" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="7" ports="ALU2, FP3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="ALU2, FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="ALU,FP2/3" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SS" iform="VCVTSI2SS_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VCVTSI2SS (XMM, XMM, M64)" summary="Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VCVTSI2SS_XMM_XMM_M64.html" url-ref="felixcloutier.com/x86/CVTSI2SS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p23+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSI2SS" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSI2SS" iform="VCVTSI2SS_XMMdq_XMMdq_GPR64q" isa-set="AVX" mxcsr="1" string="VCVTSI2SS (XMM, XMM, R64)" summary="Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VCVTSI2SS_XMM_XMM_R64.html" url-ref="felixcloutier.com/x86/CVTSI2SS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.93" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.93" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="7" ports="ALU2, FP3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="ALU2, FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="2">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="ALU,FP2/3" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTSS2SD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSS2SD" iform="VCVTSS2SD_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VCVTSS2SD (XMM, XMM, M32)" summary="Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VCVTSS2SD_XMM_XMM_M32.html" url-ref="felixcloutier.com/x86/CVTSS2SD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSS2SD" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSS2SD" iform="VCVTSS2SD_XMMdq_XMMdq_XMMd" isa-set="AVX" mxcsr="1" string="VCVTSS2SD (XMM, XMM, XMM)" summary="Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VCVTSS2SD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/CVTSS2SD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.48" TP_ports="0.50" ports="1*p01" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p01+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.48" TP_ports="0.50" ports="1*p01" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSS2SI" iform="VCVTSS2SI_GPR32d_MEMd" isa-set="AVX" mxcsr="1" string="VCVTSS2SI (R32, M32)" summary="Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer" url="uops.info/html-instr/VCVTSS2SI_R32_M32.html" url-ref="felixcloutier.com/x86/CVTSS2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSS2SI" iform="VCVTSS2SI_GPR32d_XMMd" isa-set="AVX" mxcsr="1" string="VCVTSS2SI (R32, XMM)" summary="Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer" url="uops.info/html-instr/VCVTSS2SI_R32_XMM.html" url-ref="felixcloutier.com/x86/CVTSS2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="2.3"/>
        <IACA TP="0.90" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="2.3"/>
        <IACA TP="0.90" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="8" ports="FP3, ALU0" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3, ALU0" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP2/3, FP4" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSS2SI" iform="VCVTSS2SI_GPR64q_MEMd" isa-set="AVX" mxcsr="1" string="VCVTSS2SI (R64, M32)" summary="Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer" url="uops.info/html-instr/VCVTSS2SI_R64_M32.html" url-ref="felixcloutier.com/x86/CVTSS2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTSS2SI" iform="VCVTSS2SI_GPR64q_XMMd" isa-set="AVX" mxcsr="1" string="VCVTSS2SI (R64, XMM)" summary="Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer" url="uops.info/html-instr/VCVTSS2SI_R64_XMM.html" url-ref="felixcloutier.com/x86/CVTSS2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="2.3"/>
        <IACA TP="0.90" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="2.3"/>
        <IACA TP="0.90" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="8" ports="FP3, ALU0" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3, ALU0" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP2/3, FP4" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPD2DQ" iform="VCVTTPD2DQ_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTTPD2DQ (XMM, M128)" summary="Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url="uops.info/html-instr/VCVTTPD2DQ_XMM_M128.html" url-ref="felixcloutier.com/x86/CVTTPD2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPD2DQ" iform="VCVTTPD2DQ_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTTPD2DQ (XMM, XMM)" summary="Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url="uops.info/html-instr/VCVTTPD2DQ_XMM_XMM.html" url-ref="felixcloutier.com/x86/CVTTPD2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.89" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.89" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="5.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="7" ports="FP3, FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP3, FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPD2DQ" iform="VCVTTPD2DQ_XMMdq_MEMqq" isa-set="AVX" mxcsr="1" string="VCVTTPD2DQ (XMM, M256)" summary="Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url="uops.info/html-instr/VCVTTPD2DQ_XMM_M256.html" url-ref="felixcloutier.com/x86/CVTTPD2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p1+1*p23+1*p5" ports_indexed="1*p1+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p01+1*p23+1*p5" ports_indexed="1*p01+1*p23+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01+1*FP23" uops="2">
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPD2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPD2DQ" iform="VCVTTPD2DQ_XMMdq_YMMqq" isa-set="AVX" mxcsr="1" string="VCVTTPD2DQ (XMM, YMM)" summary="Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers" url="uops.info/html-instr/VCVTTPD2DQ_XMM_YMM.html" url-ref="felixcloutier.com/x86/CVTTPD2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="4" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p1+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p1+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.84" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.84" TP_ports="1.00" ports="1*p015+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p01+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="7" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01+1*FP23" uops="2">
          <latency cycles="6" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPS2DQ" iform="VCVTTPS2DQ_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VCVTTPS2DQ (XMM, M128)" summary="Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url="uops.info/html-instr/VCVTTPS2DQ_XMM_M128.html" url-ref="felixcloutier.com/x86/CVTTPS2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPS2DQ" iform="VCVTTPS2DQ_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VCVTTPS2DQ (XMM, XMM)" summary="Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url="uops.info/html-instr/VCVTTPS2DQ_XMM_XMM.html" url-ref="felixcloutier.com/x86/CVTTPS2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPS2DQ" iform="VCVTTPS2DQ_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VCVTTPS2DQ (YMM, M256)" summary="Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url="uops.info/html-instr/VCVTTPS2DQ_YMM_M256.html" url-ref="felixcloutier.com/x86/CVTTPS2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTPS2DQ" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTPS2DQ" iform="VCVTTPS2DQ_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VCVTTPS2DQ (YMM, YMM)" summary="Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values" url="uops.info/html-instr/VCVTTPS2DQ_YMM_YMM.html" url-ref="felixcloutier.com/x86/CVTTPS2DQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="4" ports="FP3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP3" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSD2SI" iform="VCVTTSD2SI_GPR32d_MEMq" isa-set="AVX" mxcsr="1" string="VCVTTSD2SI (R32, M64)" summary="Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Integer" url="uops.info/html-instr/VCVTTSD2SI_R32_M64.html" url-ref="felixcloutier.com/x86/CVTTSD2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="20" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="24" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="21" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSD2SI" iform="VCVTTSD2SI_GPR32d_XMMq" isa-set="AVX" mxcsr="1" string="VCVTTSD2SI (R32, XMM)" summary="Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Integer" url="uops.info/html-instr/VCVTTSD2SI_R32_XMM.html" url-ref="felixcloutier.com/x86/CVTTSD2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="2.3"/>
        <IACA TP="0.90" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="2.3"/>
        <IACA TP="0.90" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="8" ports="FP3, ALU0" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3, ALU0" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP2/3, FP4" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSD2SI" iform="VCVTTSD2SI_GPR64q_MEMq" isa-set="AVX" mxcsr="1" string="VCVTTSD2SI (R64, M64)" summary="Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Integer" url="uops.info/html-instr/VCVTTSD2SI_R64_M64.html" url-ref="felixcloutier.com/x86/CVTTSD2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSD2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSD2SI" iform="VCVTTSD2SI_GPR64q_XMMq" isa-set="AVX" mxcsr="1" string="VCVTTSD2SI (R64, XMM)" summary="Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Integer" url="uops.info/html-instr/VCVTTSD2SI_R64_XMM.html" url-ref="felixcloutier.com/x86/CVTTSD2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="2.3"/>
        <IACA TP="0.90" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="2.3"/>
        <IACA TP="0.90" TP_ports="1.00" ports="1*p0+1*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="8" ports="FP3, ALU0" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3, ALU0" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP2/3, FP4" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSS2SI" iform="VCVTTSS2SI_GPR32d_MEMd" isa-set="AVX" mxcsr="1" string="VCVTTSS2SI (R32, M32)" summary="Convert with Truncation Scalar Single-Precision Floating-Point Value to Integer" url="uops.info/html-instr/VCVTTSS2SI_R32_M32.html" url-ref="felixcloutier.com/x86/CVTTSS2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSS2SI" iform="VCVTTSS2SI_GPR32d_XMMd" isa-set="AVX" mxcsr="1" string="VCVTTSS2SI (R32, XMM)" summary="Convert with Truncation Scalar Single-Precision Floating-Point Value to Integer" url="uops.info/html-instr/VCVTTSS2SI_R32_XMM.html" url-ref="felixcloutier.com/x86/CVTTSS2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015+1*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p0+1*p015+1*p5" uops="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015+1*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p0+1*p015+1*p5" uops="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p01" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="8" ports="FP3, ALU0" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3, ALU0" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP2/3, FP4" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSS2SI" iform="VCVTTSS2SI_GPR64q_MEMd" isa-set="AVX" mxcsr="1" string="VCVTTSS2SI (R64, M32)" summary="Convert with Truncation Scalar Single-Precision Floating-Point Value to Integer" url="uops.info/html-instr/VCVTTSS2SI_R64_M32.html" url-ref="felixcloutier.com/x86/CVTTSS2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p1+1*p23" ports_indexed="1*p0+1*p1+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.93" TP_indexed="0.90" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p015+1*p23" ports_indexed="1*p0+1*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p01+1*p23" ports_indexed="1*p0+1*p01+1*p23" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="13" cycles_addr_index="13" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VCVTTSS2SI" category="CONVERT" cpl="3" extension="AVX" iclass="VCVTTSS2SI" iform="VCVTTSS2SI_GPR64q_XMMd" isa-set="AVX" mxcsr="1" string="VCVTTSS2SI (R64, XMM)" summary="Convert with Truncation Scalar Single-Precision Floating-Point Value to Integer" url="uops.info/html-instr/VCVTTSS2SI_R64_XMM.html" url-ref="felixcloutier.com/x86/CVTTSS2SI.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+1*p1" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="2.3"/>
        <IACA TP="0.97" TP_ports="1.00" ports="1*p0+1*p1" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p1" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015+1*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p0+1*p015+1*p5" uops="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p015+1*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p0+1*p015+1*p5" uops="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="8" ports="FP3, ALU0" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP3, ALU0" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP23" uops="2">
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP2/3, FP4" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VDIVPD" category="AVX" cpl="3" extension="AVX" iclass="VDIVPD" iform="VDIVPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VDIVPD (XMM, XMM, M128)" summary="Divide Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VDIVPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/DIVPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="21.00" TP_no_interiteration="21.00" TP_ports="1.00" div_cycles="21" fusion_occurred="1" latency="27" ports="1*p0+1*p23" uops="2" version="2.1"/>
        <IACA TP="21.00" TP_no_interiteration="21.00" TP_ports="1.00" div_cycles="21" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="21.00" TP_ports="1.00" div_cycles="21" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="22" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="28" max_cycles_addr_index="28" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" latency="26" ports="1*p0+1*p23" uops="2" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="11" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="26" max_cycles_addr_index="26" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" latency="26" ports="1*p0+1*p23" uops="2" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="9" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="26" max_cycles_addr_index="26" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="8.00" TP_no_interiteration="8.00" TP_ports="1.00" div_cycles="8" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="8.00" TP_ports="1.00" div_cycles="8" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="8.00" TP_ports="1.00" div_cycles="8" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="9" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="20" max_cycles_addr_index="20" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="3.99" TP_ports="1.00" div_cycles="4" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="3.99" TP_ports="1.00" div_cycles="4" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="1*FP3" uops="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" ports="1*FP3" uops="1">
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.50" TP_ports="0.50" TP_unrolled="4.50" ports="1*FP01" uops="1">
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVPD" category="AVX" cpl="3" extension="AVX" iclass="VDIVPD" iform="VDIVPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VDIVPD (XMM, XMM, XMM)" summary="Divide Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VDIVPD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/DIVPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="21.00" TP_no_interiteration="21.00" TP_ports="1.00" div_cycles="21" latency="21" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="21.00" TP_no_interiteration="21.00" TP_ports="1.00" div_cycles="21" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="21.00" TP_ports="1.00" div_cycles="21" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="22" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="22" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" latency="20" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="11" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" latency="20" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="13.81" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="8.02" TP_ports="1.00" TP_unrolled="8.00" div_cycles="9" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="8.00" TP_no_interiteration="8.00" TP_ports="1.00" div_cycles="8" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="8.00" TP_ports="1.00" div_cycles="8" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="7.88" TP_ports="1.00" div_cycles="8" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="9" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="3.95" TP_ports="1.00" div_cycles="4" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="3.95" TP_ports="1.00" div_cycles="4" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="1*FP3" uops="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="4.50" latency="13" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" ports="1*FP3" uops="1">
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="6.50" latency="13" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.50" TP_ports="0.50" TP_unrolled="4.50" ports="1*FP01" uops="1">
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="6.50" latency="13" ports="FP3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VDIVPD" category="AVX" cpl="3" extension="AVX" iclass="VDIVPD" iform="VDIVPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VDIVPD (YMM, YMM, M256)" summary="Divide Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VDIVPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/DIVPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="42.00" TP_no_interiteration="42.00" TP_ports="2.00" div_cycles="42" fusion_occurred="1" latency="69" ports="2*p0+1*p05+1*p23" uops="4" version="2.1"/>
        <IACA TP="42.00" TP_no_interiteration="42.00" TP_ports="2.00" div_cycles="42" fusion_occurred="1" ports="2*p0+1*p05+1*p23" uops="4" version="2.2"/>
        <IACA TP="42.00" TP_ports="2.00" div_cycles="42" fusion_occurred="1" ports="2*p0+1*p05+1*p23" uops="4" version="2.3"/>
        <measurement TP_loop="20.16" TP_ports="2.00" TP_unrolled="20.06" available_simple_decoders="0" complex_decoder="1" div_cycles="6" ports="2*p0+1*p05+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="45" max_cycles_is_upper_bound="1" min_cycles="21" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="53" max_cycles_addr_index="53" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="29" min_cycles_addr_index="29" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="28.00" TP_no_interiteration="28.00" TP_ports="2.00" div_cycles="28" fusion_occurred="1" latency="54" ports="2*p0+1*p05+1*p23" uops="4" version="2.1"/>
        <IACA TP="28.00" TP_no_interiteration="28.00" TP_ports="2.00" div_cycles="28" fusion_occurred="1" ports="2*p0+1*p05+1*p23" uops="4" version="2.2"/>
        <IACA TP="28.00" TP_ports="2.00" div_cycles="28" fusion_occurred="1" ports="2*p0+1*p05+1*p23" uops="4" version="2.3"/>
        <measurement TP_loop="16.15" TP_ports="2.00" TP_unrolled="16.00" available_simple_decoders="0" complex_decoder="1" div_cycles="19" ports="2*p0+1*p05+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="35" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="43" max_cycles_addr_index="43" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="27" min_cycles_addr_index="27" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="28.00" TP_no_interiteration="28.00" TP_ports="2.00" div_cycles="28" fusion_occurred="1" latency="54" ports="2*p0+1*p015+1*p23" uops="4" version="2.1"/>
        <IACA TP="28.00" TP_no_interiteration="28.00" TP_ports="2.00" div_cycles="28" fusion_occurred="1" ports="2*p0+1*p015+1*p23" uops="4" version="2.2"/>
        <IACA TP="28.00" TP_ports="2.00" div_cycles="28" fusion_occurred="1" ports="2*p0+1*p015+1*p23" uops="4" version="2.3"/>
        <IACA TP="28.00" TP_ports="2.00" div_cycles="28" fusion_occurred="1" ports="2*p0+1*p01+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="16.25" TP_ports="2.00" TP_unrolled="16.31" available_simple_decoders="0" complex_decoder="1" div_cycles="9" ports="2*p0+1*p015+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="35" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="42" max_cycles_addr_index="42" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="26" min_cycles_addr_index="26" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="16.00" TP_no_interiteration="16.00" TP_ports="2.00" div_cycles="16" fusion_occurred="1" ports="2*p0+1*p015+1*p23" uops="4" version="2.2"/>
        <IACA TP="16.00" TP_ports="2.00" div_cycles="16" fusion_occurred="1" ports="2*p0+1*p015+1*p23" uops="4" version="2.3"/>
        <IACA TP="16.00" TP_ports="2.00" div_cycles="16" fusion_occurred="1" ports="2*p0+1*p01+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="16.25" TP_ports="2.00" TP_unrolled="16.33" available_simple_decoders="0" complex_decoder="1" div_cycles="17" ports="2*p0+1*p015+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="23" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="30" max_cycles_addr_index="30" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="26" min_cycles_addr_index="26" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="8.00" TP_ports="1.00" div_cycles="8" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="8.00" TP_ports="1.00" div_cycles="8" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="22" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="21" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="8.00" TP_ports="1.00" div_cycles="8" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="8.00" TP_ports="1.00" div_cycles="8" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="22" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="21" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="22" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="21" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="22" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="21" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="22" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="21" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="22" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="21" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="22" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="21" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="22" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="21" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="22" max_cycles_addr_index="22" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="21" min_cycles_addr_index="21" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="8.00" TP_unrolled="8.00" uops="2">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" ports="1*FP3" uops="1">
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.50" TP_ports="0.50" TP_unrolled="4.50" ports="1*FP01" uops="1">
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVPD" category="AVX" cpl="3" extension="AVX" iclass="VDIVPD" iform="VDIVPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VDIVPD (YMM, YMM, YMM)" summary="Divide Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VDIVPD_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/DIVPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="42.00" TP_no_interiteration="42.00" TP_ports="2.00" div_cycles="42" latency="62" ports="2*p0+1*p05" uops="3" version="2.1"/>
        <IACA TP="42.00" TP_no_interiteration="42.00" TP_ports="2.00" div_cycles="42" ports="2*p0+1*p05" uops="3" version="2.2"/>
        <IACA TP="42.00" TP_ports="2.00" div_cycles="42" ports="2*p0+1*p05" uops="3" version="2.3"/>
        <measurement TP_loop="20.00" TP_ports="2.00" TP_unrolled="20.00" available_simple_decoders="0" complex_decoder="1" div_cycles="6" ports="2*p0+1*p05" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="45" max_cycles_is_upper_bound="1" min_cycles="21" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="45" max_cycles_is_upper_bound="1" min_cycles="21" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="28.00" TP_no_interiteration="28.00" TP_ports="2.00" div_cycles="28" latency="47" ports="2*p0+1*p05" uops="3" version="2.1"/>
        <IACA TP="28.00" TP_no_interiteration="28.00" TP_ports="2.00" div_cycles="28" ports="2*p0+1*p05" uops="3" version="2.2"/>
        <IACA TP="28.00" TP_ports="2.00" div_cycles="28" ports="2*p0+1*p05" uops="3" version="2.3"/>
        <measurement TP_loop="16.45" TP_ports="2.00" TP_unrolled="16.48" available_simple_decoders="0" complex_decoder="1" div_cycles="19" ports="2*p0+1*p05" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="35" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="35" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="28.00" TP_no_interiteration="28.00" TP_ports="2.00" div_cycles="28" latency="47" ports="2*p0+1*p015" uops="3" version="2.1"/>
        <IACA TP="28.00" TP_no_interiteration="28.00" TP_ports="2.00" div_cycles="28" ports="2*p0+1*p015" uops="3" version="2.2"/>
        <IACA TP="28.00" TP_ports="2.00" div_cycles="28" ports="2*p0+1*p015" uops="3" version="2.3"/>
        <IACA TP="27.12" TP_ports="2.00" div_cycles="28" ports="2*p0+1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="16.12" TP_ports="2.00" TP_unrolled="16.12" available_simple_decoders="0" complex_decoder="1" div_cycles="9" ports="2*p0+1*p015" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="35" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="35" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="16.00" TP_no_interiteration="16.00" TP_ports="2.00" div_cycles="16" ports="2*p0+1*p015" uops="3" version="2.2"/>
        <IACA TP="16.00" TP_ports="2.00" div_cycles="16" ports="2*p0+1*p015" uops="3" version="2.3"/>
        <IACA TP="15.47" TP_ports="2.00" div_cycles="16" ports="2*p0+1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="16.11" TP_ports="2.00" TP_unrolled="16.00" available_simple_decoders="0" complex_decoder="1" div_cycles="17" ports="2*p0+1*p015" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="23" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="23" max_cycles_is_upper_bound="1" min_cycles="19" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="8.00" TP_ports="1.00" div_cycles="8" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="7.90" TP_ports="1.00" div_cycles="8" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="8.00" TP_ports="1.00" div_cycles="8" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="7.90" TP_ports="1.00" div_cycles="8" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="16" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="16" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="14" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="8.00" TP_unrolled="8.00" uops="2">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="9.00" latency="15" ports="FP3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" ports="1*FP3" uops="1">
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="6.50" latency="13" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.50" TP_ports="0.50" TP_unrolled="4.50" ports="1*FP01" uops="1">
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="6.50" latency="13" ports="FP3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VDIVPS" category="AVX" cpl="3" extension="AVX" iclass="VDIVPS" iform="VDIVPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VDIVPS (XMM, XMM, M128)" summary="Divide Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VDIVPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/DIVPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" latency="20" ports="1*p0+1*p23" uops="2" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="20" max_cycles_addr_index="20" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" latency="19" ports="1*p0+1*p23" uops="2" version="2.1"/>
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="7.00" TP_ports="1.00" TP_unrolled="7.00" div_cycles="11" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="19" max_cycles_addr_index="19" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" latency="19" ports="1*p0+1*p23" uops="2" version="2.1"/>
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="7.00" TP_ports="1.00" TP_unrolled="7.00" div_cycles="9" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="19" max_cycles_addr_index="19" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="5.00" TP_no_interiteration="5.00" TP_ports="1.00" div_cycles="5" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="5.00" TP_ports="1.00" div_cycles="5" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="5.00" TP_ports="1.00" div_cycles="5" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="3.00" TP_ports="1.00" div_cycles="3" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="2.94" TP_ports="1.00" div_cycles="3" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="3.00" TP_ports="1.00" div_cycles="3" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="2.94" TP_ports="1.00" div_cycles="3" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="3.0" latency="17.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="3.50" TP_ports="1.00" TP_unrolled="3.50" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="3.50" TP_ports="0.50" TP_unrolled="3.50" ports="1*FP01" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVPS" category="AVX" cpl="3" extension="AVX" iclass="VDIVPS" iform="VDIVPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VDIVPS (XMM, XMM, XMM)" summary="Divide Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VDIVPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/DIVPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" latency="14" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" latency="13" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="7.00" TP_ports="1.00" div_cycles="7" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="7.08" TP_ports="1.00" TP_unrolled="7.08" div_cycles="11" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" latency="13" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="7.00" TP_ports="1.00" div_cycles="7" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="6.90" TP_ports="1.00" div_cycles="7" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="7.03" TP_ports="1.00" TP_unrolled="7.03" div_cycles="9" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="5.00" TP_no_interiteration="5.00" TP_ports="1.00" div_cycles="5" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="5.00" TP_ports="1.00" div_cycles="5" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="4.92" TP_ports="1.00" div_cycles="5" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="3.00" TP_ports="1.00" div_cycles="3" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="2.96" TP_ports="1.00" div_cycles="3" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="3.00" TP_ports="1.00" div_cycles="3" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="2.96" TP_ports="1.00" div_cycles="3" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="3.0" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="3.00" latency="10" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="3.50" TP_ports="1.00" TP_unrolled="3.50" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="5.00" latency="10" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="3.50" TP_ports="0.50" TP_unrolled="3.50" ports="1*FP01" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="5.00" latency="10" ports="FP3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VDIVPS" category="AVX" cpl="3" extension="AVX" iclass="VDIVPS" iform="VDIVPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VDIVPS (YMM, YMM, M256)" summary="Divide Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VDIVPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/DIVPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="28.00" TP_no_interiteration="28.00" TP_ports="2.00" div_cycles="28" fusion_occurred="1" latency="48" ports="2*p0+1*p05+1*p23" uops="4" version="2.1"/>
        <IACA TP="28.00" TP_no_interiteration="28.00" TP_ports="2.00" div_cycles="28" fusion_occurred="1" ports="2*p0+1*p05+1*p23" uops="4" version="2.2"/>
        <IACA TP="28.00" TP_ports="2.00" div_cycles="28" fusion_occurred="1" ports="2*p0+1*p05+1*p23" uops="4" version="2.3"/>
        <measurement TP_loop="20.16" TP_ports="2.00" TP_unrolled="20.06" available_simple_decoders="0" complex_decoder="1" div_cycles="6" ports="2*p0+1*p05+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="29" max_cycles_is_upper_bound="1" min_cycles="21" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="37" max_cycles_addr_index="37" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="29" min_cycles_addr_index="29" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="2.00" div_cycles="14" fusion_occurred="1" latency="33" ports="2*p0+1*p23" uops="3" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="2.00" div_cycles="14" fusion_occurred="1" ports="2*p0+1*p23" uops="3" version="2.2"/>
        <IACA TP="14.00" TP_ports="2.00" div_cycles="14" fusion_occurred="1" ports="2*p0+1*p23" uops="3" version="2.3"/>
        <measurement TP_loop="14.06" TP_ports="2.00" TP_unrolled="14.00" available_simple_decoders="0" complex_decoder="1" div_cycles="18" ports="2*p0+1*p05+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="21" max_cycles_is_upper_bound="1" min_cycles="18" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="29" max_cycles_addr_index="29" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="26" min_cycles_addr_index="26" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="2.00" div_cycles="14" fusion_occurred="1" latency="33" ports="2*p0+1*p015+1*p23" uops="4" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="2.00" div_cycles="14" fusion_occurred="1" ports="2*p0+1*p015+1*p23" uops="4" version="2.2"/>
        <IACA TP="14.00" TP_ports="2.00" div_cycles="14" fusion_occurred="1" ports="2*p0+1*p015+1*p23" uops="4" version="2.3"/>
        <IACA TP="14.00" TP_ports="2.00" div_cycles="14" fusion_occurred="1" ports="2*p0+1*p01+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="14.14" TP_ports="2.00" TP_unrolled="14.00" available_simple_decoders="0" complex_decoder="1" div_cycles="9" ports="2*p0+1*p015+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency max_cycles="21" max_cycles_is_upper_bound="1" min_cycles="18" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="28" max_cycles_addr_index="28" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="25" min_cycles_addr_index="25" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="10.00" TP_no_interiteration="10.00" TP_ports="2.00" div_cycles="10" fusion_occurred="1" ports="2*p0+1*p015+1*p23" uops="4" version="2.2"/>
        <IACA TP="10.00" TP_ports="2.00" div_cycles="10" fusion_occurred="1" ports="2*p0+1*p015+1*p23" uops="4" version="2.3"/>
        <IACA TP="10.00" TP_ports="2.00" div_cycles="10" fusion_occurred="1" ports="2*p0+1*p01+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="10.45" TP_ports="2.00" TP_unrolled="10.35" available_simple_decoders="0" complex_decoder="1" div_cycles="15" ports="2*p0+1*p015+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="17" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="24" cycles_addr_index="24" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="5.00" TP_ports="1.00" div_cycles="5" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="5.00" TP_ports="1.00" div_cycles="5" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="5.00" TP_ports="1.00" div_cycles="5" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="5.00" TP_ports="1.00" div_cycles="5" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="5.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="2">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="3.50" TP_ports="1.00" TP_unrolled="3.50" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="3.50" TP_ports="0.50" TP_unrolled="3.50" ports="1*FP01" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVPS" category="AVX" cpl="3" extension="AVX" iclass="VDIVPS" iform="VDIVPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VDIVPS (YMM, YMM, YMM)" summary="Divide Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VDIVPS_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/DIVPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="28.00" TP_no_interiteration="28.00" TP_ports="2.00" div_cycles="28" latency="41" ports="2*p0+1*p05" uops="3" version="2.1"/>
        <IACA TP="28.00" TP_no_interiteration="28.00" TP_ports="2.00" div_cycles="28" ports="2*p0+1*p05" uops="3" version="2.2"/>
        <IACA TP="28.00" TP_ports="2.00" div_cycles="28" ports="2*p0+1*p05" uops="3" version="2.3"/>
        <measurement TP_loop="20.00" TP_ports="2.00" TP_unrolled="20.00" available_simple_decoders="0" complex_decoder="1" div_cycles="6" ports="2*p0+1*p05" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="29" max_cycles_is_upper_bound="1" min_cycles="21" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="29" max_cycles_is_upper_bound="1" min_cycles="21" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="2.00" div_cycles="14" latency="26" ports="2*p0" uops="2" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="2.00" div_cycles="14" ports="2*p0" uops="2" version="2.2"/>
        <IACA TP="14.00" TP_ports="2.00" div_cycles="14" ports="2*p0" uops="2" version="2.3"/>
        <measurement TP_loop="14.06" TP_ports="2.00" TP_unrolled="14.15" available_simple_decoders="0" complex_decoder="1" div_cycles="18" ports="2*p0+1*p05" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="21" max_cycles_is_upper_bound="1" min_cycles="18" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="21" max_cycles_is_upper_bound="1" min_cycles="18" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="2.00" div_cycles="14" latency="26" ports="2*p0+1*p015" uops="3" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="2.00" div_cycles="14" ports="2*p0+1*p015" uops="3" version="2.2"/>
        <IACA TP="14.00" TP_ports="2.00" div_cycles="14" ports="2*p0+1*p015" uops="3" version="2.3"/>
        <IACA TP="13.56" TP_ports="2.00" div_cycles="14" ports="2*p0+1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="14.00" TP_ports="2.00" TP_unrolled="14.09" available_simple_decoders="0" complex_decoder="1" div_cycles="9" ports="2*p0+1*p015" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency max_cycles="21" max_cycles_is_upper_bound="1" min_cycles="18" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="21" max_cycles_is_upper_bound="1" min_cycles="18" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="10.00" TP_no_interiteration="10.00" TP_ports="2.00" div_cycles="10" ports="2*p0+1*p015" uops="3" version="2.2"/>
        <IACA TP="10.00" TP_ports="2.00" div_cycles="10" ports="2*p0+1*p015" uops="3" version="2.3"/>
        <IACA TP="9.67" TP_ports="2.00" div_cycles="10" ports="2*p0+1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="10.06" TP_ports="2.00" TP_unrolled="10.00" available_simple_decoders="0" complex_decoder="1" div_cycles="15" ports="2*p0+1*p015" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="17" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="17" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="5.00" TP_ports="1.00" div_cycles="5" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="4.94" TP_ports="1.00" div_cycles="5" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="5.00" TP_ports="1.00" div_cycles="5" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="4.94" TP_ports="1.00" div_cycles="5" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="5.0" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" div_cycles="10" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="2">
          <latency max_cycles="11" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="11" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="6.00" latency="12" ports="FP3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="3.50" TP_ports="1.00" TP_unrolled="3.50" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="5.00" latency="10" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="3.50" TP_ports="0.50" TP_unrolled="3.50" ports="1*FP01" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="5.00" latency="10" ports="FP3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VDIVSD" category="AVX" cpl="3" extension="AVX" iclass="VDIVSD" iform="VDIVSD_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VDIVSD (XMM, XMM, M64)" summary="Divide Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VDIVSD_XMM_XMM_M64.html" url-ref="felixcloutier.com/x86/DIVSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="21.00" TP_no_interiteration="21.00" TP_ports="1.00" div_cycles="21" fusion_occurred="1" latency="27" ports="1*p0+1*p23" uops="2" version="2.1"/>
        <IACA TP="21.00" TP_no_interiteration="21.00" TP_ports="1.00" div_cycles="21" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="21.00" TP_ports="1.00" div_cycles="21" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="22" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="28" max_cycles_addr_index="28" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" latency="26" ports="1*p0+1*p23" uops="2" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="11" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="26" max_cycles_addr_index="26" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" latency="26" ports="1*p0+1*p23" uops="2" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="9" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="26" max_cycles_addr_index="26" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="4.00" TP_no_interiteration="4.00" TP_ports="1.00" div_cycles="4" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="9" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="20" max_cycles_addr_index="20" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="20" min_cycles_addr_index="20" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="1*FP3" uops="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="21" max_cycles_addr_index="21" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" ports="1*FP3" uops="1">
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.50" TP_ports="0.50" TP_unrolled="4.50" ports="1*FP01" uops="1">
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVSD" category="AVX" cpl="3" extension="AVX" iclass="VDIVSD" iform="VDIVSD_XMMdq_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VDIVSD (XMM, XMM, XMM)" summary="Divide Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VDIVSD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/DIVSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="21.00" TP_no_interiteration="21.00" TP_ports="1.00" div_cycles="21" latency="21" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="21.00" TP_no_interiteration="21.00" TP_ports="1.00" div_cycles="21" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="21.00" TP_ports="1.00" div_cycles="21" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="22" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="22" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" latency="20" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="8.00" TP_ports="1.00" TP_unrolled="8.00" div_cycles="11" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" latency="20" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="13.81" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="8.02" TP_ports="1.00" TP_unrolled="8.02" div_cycles="9" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="20" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="4.00" TP_no_interiteration="4.00" TP_ports="1.00" div_cycles="4" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="3.94" TP_ports="1.00" div_cycles="4" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="9" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="3.95" TP_ports="1.00" div_cycles="4" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="4.00" TP_ports="1.00" div_cycles="4" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="3.95" TP_ports="1.00" div_cycles="4" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" div_cycles="7" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="15" max_cycles_is_upper_bound="1" min_cycles="13" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="1*FP3" uops="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="8" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="4.50" latency="13" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="5.00" TP_ports="1.00" TP_unrolled="5.00" ports="1*FP3" uops="1">
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="6.50" latency="13" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.50" TP_ports="0.50" TP_unrolled="4.50" ports="1*FP01" uops="1">
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="6.50" latency="13" ports="FP3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VDIVSS" category="AVX" cpl="3" extension="AVX" iclass="VDIVSS" iform="VDIVSS_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VDIVSS (XMM, XMM, M32)" summary="Divide Scalar Single-Precision Floating-Point Values" url="uops.info/html-instr/VDIVSS_XMM_XMM_M32.html" url-ref="felixcloutier.com/x86/DIVSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" latency="20" ports="1*p0+1*p23" uops="2" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="20" max_cycles_addr_index="20" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" latency="19" ports="1*p0+1*p23" uops="2" version="2.1"/>
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="7.00" TP_ports="1.00" TP_unrolled="7.00" div_cycles="11" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="19" max_cycles_addr_index="19" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" latency="19" ports="1*p0+1*p23" uops="2" version="2.1"/>
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="7.00" TP_ports="1.00" div_cycles="7" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="7.00" TP_ports="1.00" TP_unrolled="7.00" div_cycles="9" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles_addr="19" max_cycles_addr_index="19" max_cycles_addr_index_is_upper_bound="1" max_cycles_addr_is_upper_bound="1" min_cycles_addr="16" min_cycles_addr_index="16" min_cycles_addr_index_is_upper_bound="1" min_cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="1.00" div_cycles="3" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="3.00" TP_ports="1.00" div_cycles="3" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="2.93" TP_ports="1.00" div_cycles="3" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="2.50" TP_ports="1.00" TP_unrolled="2.50" div_cycles="10" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="3.00" TP_ports="1.00" div_cycles="3" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="2.96" TP_ports="1.00" div_cycles="3" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="3.00" TP_ports="1.00" div_cycles="3" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <IACA TP="2.96" TP_ports="1.00" div_cycles="3" fusion_occurred="1" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="3.0" latency="17.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0+1*p23" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="3.50" TP_ports="1.00" TP_unrolled="3.50" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="3.50" TP_ports="0.50" TP_unrolled="3.50" ports="1*FP01" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDIVSS" category="AVX" cpl="3" extension="AVX" iclass="VDIVSS" iform="VDIVSS_XMMdq_XMMdq_XMMd" isa-set="AVX" mxcsr="1" string="VDIVSS (XMM, XMM, XMM)" summary="Divide Scalar Single-Precision Floating-Point Values" url="uops.info/html-instr/VDIVSS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/DIVSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" latency="14" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="14.00" TP_no_interiteration="14.00" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="14.00" TP_ports="1.00" div_cycles="14" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="10.00" TP_ports="1.00" TP_unrolled="10.00" div_cycles="3" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="14" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" latency="13" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="7.00" TP_ports="1.00" div_cycles="7" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="7.08" TP_ports="1.00" TP_unrolled="7.09" div_cycles="11" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" latency="13" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="7.00" TP_no_interiteration="7.00" TP_ports="1.00" div_cycles="7" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="7.00" TP_ports="1.00" div_cycles="7" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="6.90" TP_ports="1.00" div_cycles="7" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="7.03" TP_ports="1.00" TP_unrolled="7.03" div_cycles="9" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="13" max_cycles_is_upper_bound="1" min_cycles="10" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="1.00" div_cycles="3" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="3.00" TP_ports="1.00" div_cycles="3" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="2.95" TP_ports="1.00" div_cycles="3" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="2.57" TP_ports="1.00" TP_unrolled="2.50" div_cycles="10" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="3.00" TP_ports="1.00" div_cycles="3" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="2.96" TP_ports="1.00" div_cycles="3" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="3.00" TP_ports="1.00" div_cycles="3" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="2.96" TP_ports="1.00" div_cycles="3" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="3.0" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" div_cycles="5" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency max_cycles="12" max_cycles_is_upper_bound="1" min_cycles="11" min_cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="3.00" latency="10" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="3.50" TP_ports="1.00" TP_unrolled="3.50" ports="1*FP3" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="5.00" latency="10" ports="FP3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="3.50" TP_ports="0.50" TP_unrolled="3.50" ports="1*FP01" uops="1">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="5.00" latency="10" ports="FP3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VDPPD" category="AVX" cpl="3" extension="AVX" iclass="VDPPD" iform="VDPPD_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" mxcsr="1" string="VDPPD (XMM, XMM, M128, I8)" summary="Dot Product of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VDPPD_XMM_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/DPPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="15" ports="1*p0+1*p1+1*p23+1*p5" uops="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23+1*p5" ports_indexed="1*p0+1*p1+1*p23+1*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23+1*p5" ports_indexed="1*p0+1*p1+1*p23+1*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="15" ports="1*p0+1*p1+1*p23+1*p5" uops="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23+1*p5" ports_indexed="1*p0+1*p1+1*p23+1*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23+1*p5" ports_indexed="1*p0+1*p1+1*p23+1*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="15" ports="1*p0+1*p1+1*p23+1*p5" uops="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23+1*p5" ports_indexed="1*p0+1*p1+1*p23+1*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23+1*p5" ports_indexed="1*p0+1*p1+1*p23+1*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23+1*p5" ports_indexed="1*p0+1*p1+1*p23+1*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p1+1*p23+1*p5" ports_indexed="1*p0+1*p1+1*p23+1*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p015+1*p23+1*p5" ports_indexed="2*p015+1*p23+1*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="2*p01+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="1" complex_decoder="1" ports="2*p01+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p01+1*p23+1*p5" ports_indexed="2*p01+1*p23+1*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="2*p01+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="1" complex_decoder="1" ports="2*p01+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="1" complex_decoder="1" ports="2*p01+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="1" complex_decoder="1" ports="2*p01+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="1" complex_decoder="1" ports="2*p01+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="1" complex_decoder="1" ports="2*p01+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.05" available_simple_decoders="1" complex_decoder="1" ports="2*p01+1*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="15.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.05" available_simple_decoders="1" complex_decoder="1" ports="2*p01+1*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="1" complex_decoder="1" ports="2*p01+1*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="5">
          <latency cycles="10" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="5">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="1*FP0123+1*FP1+1*FP23" uops="5">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDPPD" category="AVX" cpl="3" extension="AVX" iclass="VDPPD" iform="VDPPD_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" mxcsr="1" string="VDPPD (XMM, XMM, XMM, I8)" summary="Dot Product of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VDPPD_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/DPPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p0+1*p1+1*p5" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p0+1*p1+1*p5" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p5" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="9" ports="1*p0+1*p1+1*p5" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p5" uops="3" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p5" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p1+1*p5" uops="3" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="2*p015+1*p5" uops="3" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="2*p01" uops="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="2*p01+1*p5" uops="3" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="2*p01" uops="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.05" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="9.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.05" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.05" available_simple_decoders="2" complex_decoder="1" ports="2*p01+1*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="3">
          <latency cycles="10" start_op="2" target_op="1"/>
          <latency cycles="10" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="3.00" latency="10" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="1*FP0123+1*FP1+1*FP23" uops="3">
          <latency cycles="9" start_op="2" target_op="1"/>
          <latency cycles="9" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VDPPS" category="AVX" cpl="3" extension="AVX" iclass="VDPPS" iform="VDPPS_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" mxcsr="1" string="VDPPS (XMM, XMM, M128, I8)" summary="Dot Product of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VDPPS_XMM_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/DPPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p23+2*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p23+2*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p1+1*p23+1*p5" ports_indexed="2*p0+1*p1+1*p23+1*p5" uops="5" uops_indexed="5" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p1+1*p23+1*p5" ports_indexed="2*p0+1*p1+1*p23+1*p5" uops="5" uops_indexed="5" version="2.3"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p0+1*p1+1*p23" uops="5" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p06+1*p1+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p1+1*p23+1*p5" ports_indexed="2*p0+1*p1+1*p23+1*p5" uops="5" uops_indexed="5" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p1+1*p23+1*p5" ports_indexed="2*p0+1*p1+1*p23+1*p5" uops="5" uops_indexed="5" version="2.3"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p0+1*p1+1*p23" uops="5" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p06+1*p1+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.33" TP_indexed="1.33" TP_ports="1.33" TP_ports_indexed="1.33" fusion_occurred="1" ports="3*p015+1*p23+1*p5" ports_indexed="3*p015+1*p23+1*p5" uops="5" uops_indexed="5" version="2.3"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p01+1*p23" uops="5" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.50" TP_indexed="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p01+1*p23+1*p5" ports_indexed="3*p01+1*p23+1*p5" uops="5" uops_indexed="5" version="2.3"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p01+1*p23" uops="5" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="17" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="5.00" TP_ports="1.67" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p15+1*p23+1*p5" uops="7" uops_MITE="4" uops_MS="3" uops_retire_slots="7">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="4.00" TP_ports="1.67" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p15+1*p23+1*p5" uops="7" uops_MITE="4" uops_MS="3" uops_retire_slots="7">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="4.00" TP_ports="1.67" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p15+1*p23+1*p5" uops="7" uops_MITE="4" uops_MS="3" uops_retire_slots="7">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="5.00" TP_unrolled="5.00" uops="10">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles_addr="23" cycles_addr_index="23" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="22" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="5.00" TP_unrolled="5.00" uops="10">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles_addr="23" cycles_addr_index="23" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="22" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="5.00" TP_ports="2.00" TP_unrolled="5.00" ports="1*FP0123+2*FP1+2*FP23" uops="10">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles_addr="23" cycles_addr_index="23" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="23" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDPPS" category="AVX" cpl="3" extension="AVX" iclass="VDPPS" iform="VDPPS_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" mxcsr="1" string="VDPPS (XMM, XMM, XMM, I8)" summary="Dot Product of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VDPPS_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/DPPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="12" ports="1*p0+2*p1+1*p5" uops="4" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p0+2*p1+1*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p0+2*p1+1*p5" uops="4" version="2.3"/>
        <measurement TP_loop="2.03" TP_ports="2.00" TP_unrolled="2.03" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="12" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="12" ports="1*p0+2*p1+1*p5" uops="4" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p0+2*p1+1*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p0+2*p1+1*p5" uops="4" version="2.3"/>
        <measurement TP_loop="2.03" TP_ports="2.00" TP_unrolled="2.02" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="12" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="14" ports="2*p0+1*p1+1*p5" uops="4" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p0+1*p1+1*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p0+1*p1+1*p5" uops="4" version="2.3"/>
        <IACA TP="1.93" TP_ports="2.00" ports="2*p0+1*p1" uops="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p0+1*p1+1*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p0+1*p1+1*p5" uops="4" version="2.3"/>
        <IACA TP="1.93" TP_ports="2.00" ports="2*p0+1*p1" uops="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="12" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.34" TP_ports="1.33" ports="3*p015+1*p5" uops="4" version="2.3"/>
        <IACA TP="1.42" TP_ports="1.50" ports="3*p01" uops="4" version="3.0"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.50" TP_ports="1.50" ports="3*p01+1*p5" uops="4" version="2.3"/>
        <IACA TP="1.42" TP_ports="1.50" ports="3*p01" uops="4" version="3.0"/>
        <measurement TP_loop="1.54" TP_ports="1.50" TP_unrolled="1.57" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.54" TP_ports="1.50" TP_unrolled="1.57" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="5.00" TP_ports="1.67" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p15+1*p5" uops="6" uops_MITE="3" uops_MS="3" uops_retire_slots="6">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="4.00" TP_ports="1.67" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p15+1*p5" uops="6" uops_MITE="3" uops_MS="3" uops_retire_slots="6">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="4.00" TP_ports="1.67" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p15+1*p5" uops="6" uops_MITE="3" uops_MS="3" uops_retire_slots="6">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="8">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles="15" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="4.00" latency="15" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="8">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles="15" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" ports="1*FP0123+2*FP1+2*FP23" uops="8">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles="15" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VDPPS" category="AVX" cpl="3" extension="AVX" iclass="VDPPS" iform="VDPPS_YMMqq_YMMqq_MEMqq_IMMb" isa-set="AVX" mxcsr="1" string="VDPPS (YMM, YMM, M256, I8)" summary="Dot Product of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VDPPS_YMM_YMM_M256_I8.html" url-ref="felixcloutier.com/x86/DPPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p23+2*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p23+2*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="20" cycles_addr_index="20" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p1+1*p23+1*p5" ports_indexed="2*p0+1*p1+1*p23+1*p5" uops="5" uops_indexed="5" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p1+1*p23+1*p5" ports_indexed="2*p0+1*p1+1*p23+1*p5" uops="5" uops_indexed="5" version="2.3"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p0+1*p1+1*p23" uops="5" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p06+1*p1+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="20" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p1+1*p23+1*p5" ports_indexed="2*p0+1*p1+1*p23+1*p5" uops="5" uops_indexed="5" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p1+1*p23+1*p5" ports_indexed="2*p0+1*p1+1*p23+1*p5" uops="5" uops_indexed="5" version="2.3"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p0+1*p1+1*p23" uops="5" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="2.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p06+1*p1+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.33" TP_indexed="1.33" TP_ports="1.33" TP_ports_indexed="1.33" fusion_occurred="1" ports="3*p015+1*p23+1*p5" ports_indexed="3*p015+1*p23+1*p5" uops="5" uops_indexed="5" version="2.3"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p01+1*p23" uops="5" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.50" TP_indexed="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p01+1*p23+1*p5" ports_indexed="3*p01+1*p23+1*p5" uops="5" uops_indexed="5" version="2.3"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p01+1*p23" uops="5" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="4.00" TP_ports="1.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p23+1*p5" uops="6" uops_MITE="4" uops_MS="2" uops_retire_slots="6">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles_addr="21" cycles_addr_index="21" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="4.00" TP_ports="1.67" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p15+1*p23+1*p5" uops="7" uops_MITE="4" uops_MS="3" uops_retire_slots="7">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles_addr="22" cycles_addr_index="22" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="4.00" TP_ports="1.67" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p15+1*p23+1*p5" uops="7" uops_MITE="4" uops_MS="3" uops_retire_slots="7">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles_addr="22" cycles_addr_index="22" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="4.00" TP_ports="1.67" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p15+1*p23+1*p5" uops="7" uops_MITE="4" uops_MS="3" uops_retire_slots="7">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles_addr="22" cycles_addr_index="22" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="19" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="5.00" TP_unrolled="5.00" uops="14">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles_addr="24" cycles_addr_index="24" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="23" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="8">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles_addr="23" cycles_addr_index="23" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="23" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.00" TP_ports="1.33" TP_unrolled="4.00" ports="1*FP0123+1*FP1+1*FP12+1*FP23+1*FP3" uops="8">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles_addr="23" cycles_addr_index="23" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="24" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VDPPS" category="AVX" cpl="3" extension="AVX" iclass="VDPPS" iform="VDPPS_YMMqq_YMMqq_YMMqq_IMMb" isa-set="AVX" mxcsr="1" string="VDPPS (YMM, YMM, YMM, I8)" summary="Dot Product of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VDPPS_YMM_YMM_YMM_I8.html" url-ref="felixcloutier.com/x86/DPPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="12" ports="1*p0+2*p1+1*p5" uops="4" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p0+2*p1+1*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p0+2*p1+1*p5" uops="4" version="2.3"/>
        <measurement TP_loop="2.03" TP_ports="2.00" TP_unrolled="2.03" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="12" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="12" ports="1*p0+2*p1+1*p5" uops="4" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p0+2*p1+1*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p0+2*p1+1*p5" uops="4" version="2.3"/>
        <measurement TP_loop="2.03" TP_ports="2.00" TP_unrolled="2.02" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="12" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="14" ports="2*p0+1*p1+1*p5" uops="4" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p0+1*p1+1*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p0+1*p1+1*p5" uops="4" version="2.3"/>
        <IACA TP="1.93" TP_ports="2.00" ports="2*p0+1*p1" uops="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p0+1*p1+1*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p0+1*p1+1*p5" uops="4" version="2.3"/>
        <IACA TP="1.93" TP_ports="2.00" ports="2*p0+1*p1" uops="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p1+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="12" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.34" TP_ports="1.33" ports="3*p015+1*p5" uops="4" version="2.3"/>
        <IACA TP="1.42" TP_ports="1.50" ports="3*p01" uops="4" version="3.0"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.50" TP_ports="1.50" ports="3*p01+1*p5" uops="4" version="2.3"/>
        <IACA TP="1.42" TP_ports="1.50" ports="3*p01" uops="4" version="3.0"/>
        <measurement TP_loop="1.55" TP_ports="1.50" TP_unrolled="1.57" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.54" TP_ports="1.50" TP_unrolled="1.57" available_simple_decoders="1" complex_decoder="1" ports="3*p01+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="13" start_op="2" target_op="1"/>
          <latency cycles="13" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="4.00" TP_ports="1.67" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p15+1*p5" uops="6" uops_MITE="3" uops_MS="3" uops_retire_slots="6">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="4.00" TP_ports="1.67" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p15+1*p5" uops="6" uops_MITE="3" uops_MS="3" uops_retire_slots="6">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="4.00" TP_ports="1.67" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p01+1*p06+1*p15+1*p5" uops="6" uops_MITE="3" uops_MS="3" uops_retire_slots="6">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="5.00" TP_unrolled="5.00" uops="13">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles="15" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="5.00" latency="16" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="7">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles="15" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.00" TP_ports="1.33" TP_unrolled="4.00" ports="1*FP0123+1*FP1+1*FP12+1*FP23+1*FP3" uops="7">
          <latency cycles="15" start_op="2" target_op="1"/>
          <latency cycles="15" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VEXTRACTF128" category="AVX" cpl="3" extension="AVX" iclass="VEXTRACTF128" iform="VEXTRACTF128_MEMdq_YMMdq_IMMb" isa-set="AVX" string="VEXTRACTF128 (M128, YMM, I8)" summary="Extra ct Packed Floating-Point Values" url="uops.info/html-instr/VEXTRACTF128_M128_YMM_I8.html" url-ref="felixcloutier.com/x86/VEXTRACTF128:VEXTRACTF32x4:VEXTRACTF64x2:VEXTRACTF32x8:VEXTRACTF64x4.html" vex="1">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p4" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p49+1*p78" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p49+1*p78" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p49+1*p78" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01" uops="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VEXTRACTF128" category="AVX" cpl="3" extension="AVX" iclass="VEXTRACTF128" iform="VEXTRACTF128_XMMdq_YMMdq_IMMb" isa-set="AVX" string="VEXTRACTF128 (XMM, YMM, I8)" summary="Extra ct Packed Floating-Point Values" url="uops.info/html-instr/VEXTRACTF128_XMM_YMM_I8.html" url-ref="felixcloutier.com/x86/VEXTRACTF128:VEXTRACTF32x4:VEXTRACTF64x2:VEXTRACTF32x8:VEXTRACTF64x4.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.38" ports="1*FP013" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP0" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VEXTRACTPS" category="AVX" cpl="3" extension="AVX" iclass="VEXTRACTPS" iform="VEXTRACTPS_MEMd_XMMdq_IMMb" isa-set="AVX" string="VEXTRACTPS (M32, XMM, I8)" summary="Extract Packed Floating-Point Values" url="uops.info/html-instr/VEXTRACTPS_M32_XMM_I8.html" url-ref="felixcloutier.com/x86/EXTRACTPS.html" vex="1">
      <operand idx="1" memory-prefix="dword ptr" name="MEM0" type="mem" w="1" width="32" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p4+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p23+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.88" TP_indexed="0.80" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.88" TP_indexed="0.80" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p49+1*p5+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p49+1*p5+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p49+1*p5+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP12" uops="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VEXTRACTPS" category="AVX" cpl="3" extension="AVX" iclass="VEXTRACTPS" iform="VEXTRACTPS_GPR32_XMMdq_IMMb" isa-set="AVX" string="VEXTRACTPS (R32, XMM, I8)" summary="Extract Packed Floating-Point Values" url="uops.info/html-instr/VEXTRACTPS_R32_XMM_I8.html" url-ref="felixcloutier.com/x86/EXTRACTPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP12" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPD" category="AVX" cpl="3" extension="AVX" iclass="VHADDPD" iform="VHADDPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VHADDPD (XMM, XMM, M128)" summary="Packed Double-FP Horizontal Add" url="uops.info/html-instr/VHADDPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/HADDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.0" latency="12.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP3" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPD" category="AVX" cpl="3" extension="AVX" iclass="VHADDPD" iform="VHADDPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VHADDPD (XMM, XMM, XMM)" summary="Packed Double-FP Horizontal Add" url="uops.info/html-instr/VHADDPD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/HADDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.0" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="6" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP23" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VHADDPD" category="AVX" cpl="3" extension="AVX" iclass="VHADDPD" iform="VHADDPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VHADDPD (YMM, YMM, M256)" summary="Packed Double-FP Horizontal Add" url="uops.info/html-instr/VHADDPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/HADDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.0" latency="13.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="10">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP3" uops="4">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPD" category="AVX" cpl="3" extension="AVX" iclass="VHADDPD" iform="VHADDPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VHADDPD (YMM, YMM, YMM)" summary="Packed Double-FP Horizontal Add" url="uops.info/html-instr/VHADDPD_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/HADDPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.0" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="8">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="3.00" latency="6" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP23" uops="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VHADDPS" category="AVX" cpl="3" extension="AVX" iclass="VHADDPS" iform="VHADDPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VHADDPS (XMM, XMM, M128)" summary="Packed Single-FP Horizontal Add" url="uops.info/html-instr/VHADDPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/HADDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="12.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP3" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPS" category="AVX" cpl="3" extension="AVX" iclass="VHADDPS" iform="VHADDPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VHADDPS (XMM, XMM, XMM)" summary="Packed Single-FP Horizontal Add" url="uops.info/html-instr/VHADDPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/HADDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="6" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP23" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VHADDPS" category="AVX" cpl="3" extension="AVX" iclass="VHADDPS" iform="VHADDPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VHADDPS (YMM, YMM, M256)" summary="Packed Single-FP Horizontal Add" url="uops.info/html-instr/VHADDPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/HADDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="13.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="11">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP3" uops="4">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHADDPS" category="AVX" cpl="3" extension="AVX" iclass="VHADDPS" iform="VHADDPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VHADDPS (YMM, YMM, YMM)" summary="Packed Single-FP Horizontal Add" url="uops.info/html-instr/VHADDPS_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/HADDPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="8">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="3.00" latency="6" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP23" uops="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPD" iform="VHSUBPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VHSUBPD (XMM, XMM, M128)" summary="Packed Double-FP Horizontal Subtract" url="uops.info/html-instr/VHSUBPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/HSUBPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.0" latency="12.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP3" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPD" iform="VHSUBPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VHSUBPD (XMM, XMM, XMM)" summary="Packed Double-FP Horizontal Subtract" url="uops.info/html-instr/VHSUBPD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/HSUBPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.0" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="6" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP23" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPD" iform="VHSUBPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VHSUBPD (YMM, YMM, M256)" summary="Packed Double-FP Horizontal Subtract" url="uops.info/html-instr/VHSUBPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/HSUBPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.0" latency="13.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="10">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP3" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPD" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPD" iform="VHSUBPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VHSUBPD (YMM, YMM, YMM)" summary="Packed Double-FP Horizontal Subtract" url="uops.info/html-instr/VHSUBPD_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/HSUBPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.0" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="8">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="3.00" latency="6" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="4" uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP23" uops="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPS" iform="VHSUBPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VHSUBPS (XMM, XMM, M128)" summary="Packed Single-FP Horizontal Subtract" url="uops.info/html-instr/VHSUBPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/HSUBPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="11" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="12.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP3" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPS" iform="VHSUBPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VHSUBPS (XMM, XMM, XMM)" summary="Packed Single-FP Horizontal Subtract" url="uops.info/html-instr/VHSUBPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/HSUBPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="6" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP23" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPS" iform="VHSUBPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VHSUBPS (YMM, YMM, M256)" summary="Packed Single-FP Horizontal Subtract" url="uops.info/html-instr/VHSUBPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/HSUBPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="12" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p1+1*p23+2*p5" ports_indexed="1*p1+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.75" TP_indexed="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="13.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="10">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP3" uops="4">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VHSUBPS" category="AVX" cpl="3" extension="AVX" iclass="VHSUBPS" iform="VHSUBPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VHSUBPS (YMM, YMM, YMM)" summary="Packed Single-FP Horizontal Subtract" url="uops.info/html-instr/VHSUBPS_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/HSUBPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="5" ports="1*p1+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p1+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.95" TP_ports="1.00" ports="1*p1" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p1+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="0.71" TP_ports="0.50" ports="1*p01" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="8">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="3.00" latency="6" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="4" uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1+1*FP12+1*FP23" uops="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VINSERTF128" category="AVX" cpl="3" extension="AVX" iclass="VINSERTF128" iform="VINSERTF128_YMMqq_YMMqq_MEMdq_IMMb" isa-set="AVX" string="VINSERTF128 (YMM, YMM, M128, I8)" summary="Insert Packed Floating-Point Values" url="uops.info/html-instr/VINSERTF128_YMM_YMM_M128_I8.html" url-ref="felixcloutier.com/x86/VINSERTF128:VINSERTF32x4:VINSERTF64x2:VINSERTF32x8:VINSERTF64x4.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p05+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p05+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p05+1*p23" ports_indexed="1*p05+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p05+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p015+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VINSERTF128" category="AVX" cpl="3" extension="AVX" iclass="VINSERTF128" iform="VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb" isa-set="AVX" string="VINSERTF128 (YMM, YMM, XMM, I8)" summary="Insert Packed Floating-Point Values" url="uops.info/html-instr/VINSERTF128_YMM_YMM_XMM_I8.html" url-ref="felixcloutier.com/x86/VINSERTF128:VINSERTF32x4:VINSERTF64x2:VINSERTF32x8:VINSERTF64x4.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.67" TP_unrolled="0.67" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.67" latency="1" ports="FP0/1/3, FP0/1/3" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="2" ports="FP0/1/3, FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VINSERTPS" category="AVX" cpl="3" extension="AVX" iclass="VINSERTPS" iform="VINSERTPS_XMMdq_XMMdq_MEMd_IMMb" isa-set="AVX" string="VINSERTPS (XMM, XMM, M32, I8)" summary="Insert Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VINSERTPS_XMM_XMM_M32_I8.html" url-ref="felixcloutier.com/x86/INSERTPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VINSERTPS" category="AVX" cpl="3" extension="AVX" iclass="VINSERTPS" iform="VINSERTPS_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VINSERTPS (XMM, XMM, XMM, I8)" summary="Insert Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VINSERTPS_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/INSERTPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VLDDQU" category="AVX" cpl="3" extension="AVX" iclass="VLDDQU" iform="VLDDQU_XMMdq_MEMdq" isa-set="AVX" string="VLDDQU (XMM, M128)" summary="Load Unaligned Integer 128 Bits" url="uops.info/html-instr/VLDDQU_XMM_M128.html" url-ref="felixcloutier.com/x86/LDDQU.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="6" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="6" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="6" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.49" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.49" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.49" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.49" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" ports="LD" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" ports="LD" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" ports="LD" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VLDDQU" category="AVX" cpl="3" extension="AVX" iclass="VLDDQU" iform="VLDDQU_YMMqq_MEMqq" isa-set="AVX" string="VLDDQU (YMM, M256)" summary="Load Unaligned Integer 128 Bits" url="uops.info/html-instr/VLDDQU_YMM_M256.html" url-ref="felixcloutier.com/x86/LDDQU.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="6" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="6" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="6" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.49" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.49" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.49" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_indexed="0.49" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" ports="LD" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" ports="LD" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VLDMXCSR" category="AVX" cpl="3" extension="AVX" iclass="VLDMXCSR" iform="VLDMXCSR_MEMd" isa-set="AVX" mxcsr="1" string="VLDMXCSR (M32)" summary="Load MXCSR Register" url="uops.info/html-instr/VLDMXCSR_M32.html" url-ref="felixcloutier.com/x86/LDMXCSR.html" vex="1">
      <operand idx="1" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="i32"/>
      <operand idx="2" name="REG0" suppressed="1" type="reg" w="1">MXCSR</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p0+1*p23+1*p4+1*p5" ports_indexed="1*p0+1*p23+1*p4+1*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23+1*p4+1*p5" ports_indexed="1*p0+1*p23+1*p4+1*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23+1*p4+1*p5" ports_indexed="1*p0+1*p23+1*p4+1*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.25" TP_ports="1.00" TP_unrolled="2.30" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3"/>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p0+1*p23+1*p4+1*p5" ports_indexed="1*p0+1*p23+1*p4+1*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23+1*p4+1*p5" ports_indexed="1*p0+1*p23+1*p4+1*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23+1*p4+1*p5" ports_indexed="1*p0+1*p23+1*p4+1*p5" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="2.36" TP_ports="1.00" TP_unrolled="2.40" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3"/>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p0+1*p237+1*p4+1*p5" ports_indexed="1*p0+1*p23+1*p4+1*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p0156+1*p23" ports_indexed="1*p0+1*p0156+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p0156+1*p23" ports_indexed="1*p0+1*p0156+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p0156+1*p23" ports_indexed="1*p0+1*p0156+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.25" TP_ports="1.00" TP_unrolled="2.30" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3"/>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p0156+1*p23" ports_indexed="1*p0+1*p0156+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p0156+1*p23" ports_indexed="1*p0+1*p0156+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p0156+1*p23" ports_indexed="1*p0+1*p0156+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.25" TP_ports="1.00" TP_unrolled="2.30" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3"/>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p0156+1*p23" ports_indexed="1*p0+1*p0156+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p0156+1*p23" ports_indexed="1*p0+1*p0156+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.91" TP_ports="1.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="1*p0+1*p06+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4"/>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p0156+1*p23" ports_indexed="1*p0+1*p0156+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p0156+1*p23" ports_indexed="1*p0+1*p0156+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.91" TP_ports="1.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="1*p0+1*p06+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4"/>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.91" TP_ports="1.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="1*p0+1*p06+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4"/>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.91" TP_ports="1.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="1*p0+1*p06+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4"/>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="3.20" TP_ports="1.00" TP_unrolled="3.37" available_simple_decoders="1" complex_decoder="1" ports="1*p0+1*p015+1*p06+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4"/>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.91" TP_ports="1.00" TP_unrolled="2.62" available_simple_decoders="1" complex_decoder="1" ports="1*p0+1*p06+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4"/>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="3.38" TP_ports="1.00" TP_unrolled="3.40" available_simple_decoders="1" complex_decoder="1" ports="1*p0+1*p015+1*p06+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.38" TP_ports="1.00" TP_unrolled="3.40" available_simple_decoders="1" complex_decoder="1" ports="1*p0+1*p015+1*p06+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4"/>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="3.37" TP_ports="1.00" TP_unrolled="3.40" available_simple_decoders="1" complex_decoder="1" ports="1*p0+1*p015+1*p06+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4"/>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP3" uops="1"/>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP3" uops="1"/>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.50" TP_ports="1.00" TP_unrolled="1.50" ports="1*FP0" uops="1"/>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVDQU" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVDQU" iform="VMASKMOVDQU_XMMdq_XMMdq" isa-set="AVX" string="VMASKMOVDQU (XMM, XMM)" summary="Conditional SIMD Packed Loads and Stores" url="uops.info/html-instr/VMASKMOVDQU_XMM_XMM.html" url-ref="felixcloutier.com/x86/VMASKMOV.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand base="RDI" idx="3" memory-prefix="xmmword ptr" name="MEM0" seg="DS" suppressed="1" type="mem" w="1" width="128" xtype="u8"/>
      <architecture name="SNB">
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p05+1*p15+4*p23+2*p4+1*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="22" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p05+1*p15+4*p23+2*p4+1*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="21" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p06+4*p23+2*p4+1*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="20" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p06+4*p23+2*p4+1*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="21" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" uops="3" version="2.3"/>
        <IACA TP="0.85" TP_ports="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" uops="2" version="3.0"/>
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+4*p23+2*p4+2*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="9" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" uops="3" version="2.3"/>
        <IACA TP="0.85" TP_ports="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" uops="2" version="3.0"/>
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+4*p23+2*p4+2*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="9" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+4*p23+2*p4+2*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="9" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+4*p23+2*p4+2*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="9" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="7.00" TP_ports="2.00" TP_unrolled="7.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+4*p23+2*p4+2*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="9" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="6.00" TP_ports="2.00" TP_unrolled="6.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+4*p23+2*p4+2*p5" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="9" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="7.00" TP_ports="1.00" TP_unrolled="7.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+1*p15+2*p23+2*p49+1*p5+2*p78" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="12" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="7.00" TP_ports="1.00" TP_unrolled="7.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+1*p15+2*p23+2*p49+1*p5+2*p78" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="12" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="7.00" TP_ports="1.00" TP_unrolled="7.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p06+1*p15+2*p23+2*p49+1*p5+2*p78" uops="10" uops_MITE="0" uops_MS="10" uops_retire_slots="10">
          <latency cycles_addr="12" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="18.00" TP_unrolled="18.00" uops="60">
          <latency cycles_addr="13" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="18.00" TP_unrolled="18.00" uops="60">
          <latency cycles_addr="13" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="18.00" TP_ports="4.00" TP_unrolled="18.00" ports="2*FP0123+4*FP1+2*FP12" uops="75">
          <latency cycles_addr="20" cycles_addr_is_upper_bound="1" start_op="3" target_op="3"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPD" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPD" iform="VMASKMOVPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VMASKMOVPD (XMM, XMM, M128)" summary="Conditional SIMD Packed Loads and Stores" url="uops.info/html-instr/VMASKMOVPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/VMASKMOV.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="8" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="12">
          <latency cycles="14" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="0" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPD" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPD" iform="VMASKMOVPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VMASKMOVPD (YMM, YMM, M256)" summary="Conditional SIMD Packed Loads and Stores" url="uops.info/html-instr/VMASKMOVPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/VMASKMOV.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="9" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="7.00" TP_unrolled="7.00" uops="24">
          <latency cycles="17" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="3" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPD" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPD" iform="VMASKMOVPD_MEMdq_XMMdq_XMMdq" isa-set="AVX" string="VMASKMOVPD (M128, XMM, XMM)" summary="Conditional SIMD Packed Loads and Stores" url="uops.info/html-instr/VMASKMOVPD_M128_XMM_XMM.html" url-ref="felixcloutier.com/x86/VMASKMOV.html" vex="1">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p01+1*p23+1*p4" ports_indexed="1*p01+1*p23+1*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p01+1*p23+1*p4" ports_indexed="1*p01+1*p23+1*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p01+1*p23+1*p4" ports_indexed="1*p01+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p01+1*p23+1*p4" ports_indexed="1*p01+1*p23+1*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p01+1*p23+1*p4" ports_indexed="1*p01+1*p23+1*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p01+1*p23+1*p4" ports_indexed="1*p01+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.92" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.92" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p49+1*p78" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p49+1*p78" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p49+1*p78" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="10">
          <latency cycles_addr="3" cycles_addr_index="3" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="10">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="1*FP1" uops="10">
          <latency cycles_addr="3" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPD" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPD" iform="VMASKMOVPD_MEMqq_YMMqq_YMMqq" isa-set="AVX" string="VMASKMOVPD (M256, YMM, YMM)" summary="Conditional SIMD Packed Loads and Stores" url="uops.info/html-instr/VMASKMOVPD_M256_YMM_YMM.html" url-ref="felixcloutier.com/x86/VMASKMOV.html" vex="1">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p01+1*p23+2*p4" ports_indexed="1*p01+1*p23+2*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p4" ports_indexed="1*p01+1*p23+2*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p4" ports_indexed="1*p01+1*p23+2*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p01+1*p23+2*p4" ports_indexed="1*p01+1*p23+2*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p4" ports_indexed="1*p01+1*p23+2*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p4" ports_indexed="1*p01+1*p23+2*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.92" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.92" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p49+1*p78" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p49+1*p78" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p49+1*p78" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="18">
          <latency cycles_addr="3" cycles_addr_index="3" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="19">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="6.00" TP_ports="3.00" TP_unrolled="6.00" ports="3*FP1" uops="18">
          <latency cycles_addr="3" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPS" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPS" iform="VMASKMOVPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VMASKMOVPS (XMM, XMM, M128)" summary="Conditional SIMD Packed Loads and Stores" url="uops.info/html-instr/VMASKMOVPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/VMASKMOV.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="8" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="20">
          <latency cycles="16" start_op="2" target_op="1"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="0" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPS" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPS" iform="VMASKMOVPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VMASKMOVPS (YMM, YMM, M256)" summary="Conditional SIMD Packed Loads and Stores" url="uops.info/html-instr/VMASKMOVPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/VMASKMOV.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p05+1*p23" ports_indexed="2*p05+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p05+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="9" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="10.00" TP_unrolled="10.00" uops="36">
          <latency cycles="20" start_op="2" target_op="1"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPS" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPS" iform="VMASKMOVPS_MEMdq_XMMdq_XMMdq" isa-set="AVX" string="VMASKMOVPS (M128, XMM, XMM)" summary="Conditional SIMD Packed Loads and Stores" url="uops.info/html-instr/VMASKMOVPS_M128_XMM_XMM.html" url-ref="felixcloutier.com/x86/VMASKMOV.html" vex="1">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p01+1*p23+1*p4" ports_indexed="1*p01+1*p23+1*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p01+1*p23+1*p4" ports_indexed="1*p01+1*p23+1*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p01+1*p23+1*p4" ports_indexed="1*p01+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p01+1*p23+1*p4" ports_indexed="1*p01+1*p23+1*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p01+1*p23+1*p4" ports_indexed="1*p01+1*p23+1*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p01+1*p23+1*p4" ports_indexed="1*p01+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.92" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.92" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p49+1*p78" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p49+1*p78" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p49+1*p78" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="19">
          <latency cycles_addr="3" cycles_addr_index="3" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="19">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="6.00" TP_ports="3.00" TP_unrolled="6.00" ports="3*FP1" uops="18">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMASKMOVPS" category="AVX" cpl="3" extension="AVX" iclass="VMASKMOVPS" iform="VMASKMOVPS_MEMqq_YMMqq_YMMqq" isa-set="AVX" string="VMASKMOVPS (M256, YMM, YMM)" summary="Conditional SIMD Packed Loads and Stores" url="uops.info/html-instr/VMASKMOVPS_M256_YMM_YMM.html" url-ref="felixcloutier.com/x86/VMASKMOV.html" vex="1">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p01+1*p23+2*p4" ports_indexed="1*p01+1*p23+2*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p4" ports_indexed="1*p01+1*p23+2*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p4" ports_indexed="1*p01+1*p23+2*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p01+1*p23+2*p4" ports_indexed="1*p01+1*p23+2*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p4" ports_indexed="1*p01+1*p23+2*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p4" ports_indexed="1*p01+1*p23+2*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.92" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="0.92" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p237+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p1+1*p23+1*p4" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p237+1*p4" ports_indexed="1*p0+1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p23+1*p4" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p49+1*p78" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p49+1*p78" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p49+1*p78" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="12.00" TP_unrolled="12.00" uops="42">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="12.00" TP_unrolled="12.00" uops="44">
          <latency cycles_addr="4" cycles_addr_index="4" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="12.00" TP_ports="4.00" TP_unrolled="12.00" ports="1*FP0123+4*FP1+2*FP12" uops="42">
          <latency cycles_addr="7" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
          <latency cycles="0" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPD" category="AVX" cpl="3" extension="AVX" iclass="VMAXPD" iform="VMAXPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VMAXPD (XMM, XMM, M128)" summary="Maximum of Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMAXPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/MAXPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPD" category="AVX" cpl="3" extension="AVX" iclass="VMAXPD" iform="VMAXPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VMAXPD (XMM, XMM, XMM)" summary="Maximum of Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMAXPD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MAXPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMAXPD" category="AVX" cpl="3" extension="AVX" iclass="VMAXPD" iform="VMAXPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VMAXPD (YMM, YMM, M256)" summary="Maximum of Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMAXPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/MAXPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPD" category="AVX" cpl="3" extension="AVX" iclass="VMAXPD" iform="VMAXPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VMAXPD (YMM, YMM, YMM)" summary="Maximum of Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMAXPD_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/MAXPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMAXPS" category="AVX" cpl="3" extension="AVX" iclass="VMAXPS" iform="VMAXPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VMAXPS (XMM, XMM, M128)" summary="Maximum of Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMAXPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/MAXPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPS" category="AVX" cpl="3" extension="AVX" iclass="VMAXPS" iform="VMAXPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VMAXPS (XMM, XMM, XMM)" summary="Maximum of Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMAXPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MAXPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMAXPS" category="AVX" cpl="3" extension="AVX" iclass="VMAXPS" iform="VMAXPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VMAXPS (YMM, YMM, M256)" summary="Maximum of Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMAXPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/MAXPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXPS" category="AVX" cpl="3" extension="AVX" iclass="VMAXPS" iform="VMAXPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VMAXPS (YMM, YMM, YMM)" summary="Maximum of Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMAXPS_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/MAXPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMAXSD" category="AVX" cpl="3" extension="AVX" iclass="VMAXSD" iform="VMAXSD_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VMAXSD (XMM, XMM, M64)" summary="Return Maximum Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VMAXSD_XMM_XMM_M64.html" url-ref="felixcloutier.com/x86/MAXSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXSD" category="AVX" cpl="3" extension="AVX" iclass="VMAXSD" iform="VMAXSD_XMMdq_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VMAXSD (XMM, XMM, XMM)" summary="Return Maximum Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VMAXSD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MAXSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMAXSS" category="AVX" cpl="3" extension="AVX" iclass="VMAXSS" iform="VMAXSS_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VMAXSS (XMM, XMM, M32)" summary="Return Maximum Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VMAXSS_XMM_XMM_M32.html" url-ref="felixcloutier.com/x86/MAXSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMAXSS" category="AVX" cpl="3" extension="AVX" iclass="VMAXSS" iform="VMAXSS_XMMdq_XMMdq_XMMd" isa-set="AVX" mxcsr="1" string="VMAXSS (XMM, XMM, XMM)" summary="Return Maximum Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VMAXSS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MAXSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMINPD" category="AVX" cpl="3" extension="AVX" iclass="VMINPD" iform="VMINPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VMINPD (XMM, XMM, M128)" summary="Minimum of Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMINPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/MINPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINPD" category="AVX" cpl="3" extension="AVX" iclass="VMINPD" iform="VMINPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VMINPD (XMM, XMM, XMM)" summary="Minimum of Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMINPD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MINPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMINPD" category="AVX" cpl="3" extension="AVX" iclass="VMINPD" iform="VMINPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VMINPD (YMM, YMM, M256)" summary="Minimum of Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMINPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/MINPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINPD" category="AVX" cpl="3" extension="AVX" iclass="VMINPD" iform="VMINPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VMINPD (YMM, YMM, YMM)" summary="Minimum of Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMINPD_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/MINPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMINPS" category="AVX" cpl="3" extension="AVX" iclass="VMINPS" iform="VMINPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VMINPS (XMM, XMM, M128)" summary="Minimum of Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMINPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/MINPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINPS" category="AVX" cpl="3" extension="AVX" iclass="VMINPS" iform="VMINPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VMINPS (XMM, XMM, XMM)" summary="Minimum of Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMINPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MINPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMINPS" category="AVX" cpl="3" extension="AVX" iclass="VMINPS" iform="VMINPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VMINPS (YMM, YMM, M256)" summary="Minimum of Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMINPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/MINPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINPS" category="AVX" cpl="3" extension="AVX" iclass="VMINPS" iform="VMINPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VMINPS (YMM, YMM, YMM)" summary="Minimum of Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMINPS_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/MINPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMINSD" category="AVX" cpl="3" extension="AVX" iclass="VMINSD" iform="VMINSD_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VMINSD (XMM, XMM, M64)" summary="Return Minimum Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VMINSD_XMM_XMM_M64.html" url-ref="felixcloutier.com/x86/MINSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINSD" category="AVX" cpl="3" extension="AVX" iclass="VMINSD" iform="VMINSD_XMMdq_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VMINSD (XMM, XMM, XMM)" summary="Return Minimum Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VMINSD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MINSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMINSS" category="AVX" cpl="3" extension="AVX" iclass="VMINSS" iform="VMINSS_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VMINSS (XMM, XMM, M32)" summary="Return Minimum Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VMINSS_XMM_XMM_M32.html" url-ref="felixcloutier.com/x86/MINSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p1+1*p23" ports_indexed="1*p1+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMINSS" category="AVX" cpl="3" extension="AVX" iclass="VMINSS" iform="VMINSS_XMMdq_XMMdq_XMMd" isa-set="AVX" mxcsr="1" string="VMINSS (XMM, XMM, XMM)" summary="Return Minimum Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VMINSS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MINSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p1" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p1" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p1" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p1" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP23" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_XMMdq_MEMdq" isa-set="AVX" string="VMOVAPD (XMM, M128)" summary="Move Aligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPD_XMM_M128.html" url-ref="felixcloutier.com/x86/MOVAPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_XMMdq_XMMdq_28" isa-set="AVX" string="VMOVAPD_28 (XMM, XMM)" summary="Move Aligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPD_28_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVAPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_MEMdq_XMMdq" isa-set="AVX" string="VMOVAPD (M128, XMM)" summary="Move Aligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPD_M128_XMM.html" url-ref="felixcloutier.com/x86/MOVAPD.html" vex="1">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_XMMdq_XMMdq_29" isa-set="AVX" string="VMOVAPD_29 (XMM, XMM)" summary="Move Aligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPD_29_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVAPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_YMMqq_MEMqq" isa-set="AVX" string="VMOVAPD (YMM, M256)" summary="Move Aligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPD_YMM_M256.html" url-ref="felixcloutier.com/x86/MOVAPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_YMMqq_YMMqq_28" isa-set="AVX" string="VMOVAPD_28 (YMM, YMM)" summary="Move Aligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPD_28_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVAPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_MEMqq_YMMqq" isa-set="AVX" string="VMOVAPD (M256, YMM)" summary="Move Aligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPD_M256_YMM.html" url-ref="felixcloutier.com/x86/MOVAPD.html" vex="1">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVAPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPD" iform="VMOVAPD_YMMqq_YMMqq_29" isa-set="AVX" string="VMOVAPD_29 (YMM, YMM)" summary="Move Aligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPD_29_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVAPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_XMMdq_MEMdq" isa-set="AVX" string="VMOVAPS (XMM, M128)" summary="Move Aligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPS_XMM_M128.html" url-ref="felixcloutier.com/x86/MOVAPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_XMMdq_XMMdq_28" isa-set="AVX" string="VMOVAPS_28 (XMM, XMM)" summary="Move Aligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPS_28_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVAPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_MEMdq_XMMdq" isa-set="AVX" string="VMOVAPS (M128, XMM)" summary="Move Aligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPS_M128_XMM.html" url-ref="felixcloutier.com/x86/MOVAPS.html" vex="1">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_XMMdq_XMMdq_29" isa-set="AVX" string="VMOVAPS_29 (XMM, XMM)" summary="Move Aligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPS_29_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVAPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_YMMqq_MEMqq" isa-set="AVX" string="VMOVAPS (YMM, M256)" summary="Move Aligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPS_YMM_M256.html" url-ref="felixcloutier.com/x86/MOVAPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_YMMqq_YMMqq_28" isa-set="AVX" string="VMOVAPS_28 (YMM, YMM)" summary="Move Aligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPS_28_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVAPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_MEMqq_YMMqq" isa-set="AVX" string="VMOVAPS (M256, YMM)" summary="Move Aligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPS_M256_YMM.html" url-ref="felixcloutier.com/x86/MOVAPS.html" vex="1">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVAPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVAPS" iform="VMOVAPS_YMMqq_YMMqq_29" isa-set="AVX" string="VMOVAPS_29 (YMM, YMM)" summary="Move Aligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVAPS_29_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVAPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVD" iform="VMOVD_XMMdq_MEMd" isa-set="AVX" string="VMOVD (XMM, M32)" summary="Move Doubleword/Move Quadword" url="uops.info/html-instr/VMOVD_XMM_M32.html" url-ref="felixcloutier.com/x86/MOVD:MOVQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVD" iform="VMOVD_XMMdq_GPR32d" isa-set="AVX" string="VMOVD (XMM, R32)" summary="Move Doubleword/Move Quadword" url="uops.info/html-instr/VMOVD_XMM_R32.html" url-ref="felixcloutier.com/x86/MOVD:MOVQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="ALU2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="ALU2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="ALU2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVD" iform="VMOVD_MEMd_XMMd" isa-set="AVX" string="VMOVD (M32, XMM)" summary="Move Doubleword/Move Quadword" url="uops.info/html-instr/VMOVD_M32_XMM.html" url-ref="felixcloutier.com/x86/MOVD:MOVQ.html" vex="1">
      <operand idx="1" memory-prefix="dword ptr" name="MEM0" type="mem" w="1" width="32" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="32" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVD" iform="VMOVD_GPR32d_XMMd" isa-set="AVX" string="VMOVD (R32, XMM)" summary="Move Doubleword/Move Quadword" url="uops.info/html-instr/VMOVD_R32_XMM.html" url-ref="felixcloutier.com/x86/MOVD:MOVQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="32" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="ALU0" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="ALU0" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="ALU0" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVDDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDDUP" iform="VMOVDDUP_XMMdq_MEMq" isa-set="AVX" string="VMOVDDUP (XMM, M64)" summary="Replicate Double FP Values" url="uops.info/html-instr/VMOVDDUP_XMM_M64.html" url-ref="felixcloutier.com/x86/MOVDDUP.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDDUP" iform="VMOVDDUP_XMMdq_XMMq" isa-set="AVX" string="VMOVDDUP (XMM, XMM)" summary="Replicate Double FP Values" url="uops.info/html-instr/VMOVDDUP_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVDDUP.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVDDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDDUP" iform="VMOVDDUP_YMMqq_MEMqq" isa-set="AVX" string="VMOVDDUP (YMM, M256)" summary="Replicate Double FP Values" url="uops.info/html-instr/VMOVDDUP_YMM_M256.html" url-ref="felixcloutier.com/x86/MOVDDUP.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVDDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDDUP" iform="VMOVDDUP_YMMqq_YMMqq" isa-set="AVX" string="VMOVDDUP (YMM, YMM)" summary="Replicate Double FP Values" url="uops.info/html-instr/VMOVDDUP_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVDDUP.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_XMMdq_MEMdq" isa-set="AVX" string="VMOVDQA (XMM, M128)" summary="Move Aligned Packed Integer Values" url="uops.info/html-instr/VMOVDQA_XMM_M128.html" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_XMMdq_XMMdq_6F" isa-set="AVX" string="VMOVDQA_6F (XMM, XMM)" summary="Move Aligned Packed Integer Values" url="uops.info/html-instr/VMOVDQA_6F_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_MEMdq_XMMdq" isa-set="AVX" string="VMOVDQA (M128, XMM)" summary="Move Aligned Packed Integer Values" url="uops.info/html-instr/VMOVDQA_M128_XMM.html" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html" vex="1">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_XMMdq_XMMdq_7F" isa-set="AVX" string="VMOVDQA_7F (XMM, XMM)" summary="Move Aligned Packed Integer Values" url="uops.info/html-instr/VMOVDQA_7F_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_YMMqq_MEMqq" isa-set="AVX" string="VMOVDQA (YMM, M256)" summary="Move Aligned Packed Integer Values" url="uops.info/html-instr/VMOVDQA_YMM_M256.html" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_YMMqq_YMMqq_6F" isa-set="AVX" string="VMOVDQA_6F (YMM, YMM)" summary="Move Aligned Packed Integer Values" url="uops.info/html-instr/VMOVDQA_6F_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.38" TP_loop_same_reg="1.00" TP_ports_same_reg="0.50" TP_unrolled="0.38" TP_unrolled_same_reg="1.00" ports_same_reg="1*p05" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_MEMqq_YMMqq" isa-set="AVX" string="VMOVDQA (M256, YMM)" summary="Move Aligned Packed Integer Values" url="uops.info/html-instr/VMOVDQA_M256_YMM.html" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html" vex="1">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQA" iform="VMOVDQA_YMMqq_YMMqq_7F" isa-set="AVX" string="VMOVDQA_7F (YMM, YMM)" summary="Move Aligned Packed Integer Values" url="uops.info/html-instr/VMOVDQA_7F_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVDQA:VMOVDQA32:VMOVDQA64.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.38" TP_loop_same_reg="1.00" TP_ports_same_reg="0.50" TP_unrolled="0.38" TP_unrolled_same_reg="1.00" ports_same_reg="1*p05" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_XMMdq_MEMdq" isa-set="AVX" string="VMOVDQU (XMM, M128)" summary="Move Unaligned Packed Integer Values" url="uops.info/html-instr/VMOVDQU_XMM_M128.html" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_XMMdq_XMMdq_6F" isa-set="AVX" string="VMOVDQU_6F (XMM, XMM)" summary="Move Unaligned Packed Integer Values" url="uops.info/html-instr/VMOVDQU_6F_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_YMMqq_MEMqq" isa-set="AVX" string="VMOVDQU (YMM, M256)" summary="Move Unaligned Packed Integer Values" url="uops.info/html-instr/VMOVDQU_YMM_M256.html" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_YMMqq_YMMqq_6F" isa-set="AVX" string="VMOVDQU_6F (YMM, YMM)" summary="Move Unaligned Packed Integer Values" url="uops.info/html-instr/VMOVDQU_6F_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.38" TP_loop_same_reg="1.00" TP_ports_same_reg="0.50" TP_unrolled="0.38" TP_unrolled_same_reg="1.00" ports_same_reg="1*p05" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_MEMdq_XMMdq" isa-set="AVX" string="VMOVDQU (M128, XMM)" summary="Move Unaligned Packed Integer Values" url="uops.info/html-instr/VMOVDQU_M128_XMM.html" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html" vex="1">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_XMMdq_XMMdq_7F" isa-set="AVX" string="VMOVDQU_7F (XMM, XMM)" summary="Move Unaligned Packed Integer Values" url="uops.info/html-instr/VMOVDQU_7F_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_MEMqq_YMMqq" isa-set="AVX" string="VMOVDQU (M256, YMM)" summary="Move Unaligned Packed Integer Values" url="uops.info/html-instr/VMOVDQU_M256_YMM.html" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html" vex="1">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVDQU" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVDQU" iform="VMOVDQU_YMMqq_YMMqq_7F" isa-set="AVX" string="VMOVDQU_7F (YMM, YMM)" summary="Move Unaligned Packed Integer Values" url="uops.info/html-instr/VMOVDQU_7F_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVDQU:VMOVDQU8:VMOVDQU16:VMOVDQU32:VMOVDQU64.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p05" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p05" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p05" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.38" TP_loop_same_reg="1.00" TP_ports_same_reg="0.50" TP_unrolled="0.38" TP_unrolled_same_reg="1.00" ports_same_reg="1*p05" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVHLPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVHLPS" iform="VMOVHLPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VMOVHLPS (XMM, XMM, XMM)" summary="Move Packed Single-Precision Floating-Point Values High to Low" url="uops.info/html-instr/VMOVHLPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVHLPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVHPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVHPD" iform="VMOVHPD_XMMdq_XMMq_MEMq" isa-set="AVX" string="VMOVHPD (XMM, XMM, M64)" summary="Move High Packed Double-Precision Floating-Point Value" url="uops.info/html-instr/VMOVHPD_XMM_XMM_M64.html" url-ref="felixcloutier.com/x86/MOVHPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVHPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVHPD" iform="VMOVHPD_MEMq_XMMdq" isa-set="AVX" string="VMOVHPD (M64, XMM)" summary="Move High Packed Double-Precision Floating-Point Value" url="uops.info/html-instr/VMOVHPD_M64_XMM.html" url-ref="felixcloutier.com/x86/MOVHPD.html" vex="1">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP12" uops="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP4" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVHPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVHPS" iform="VMOVHPS_XMMdq_XMMq_MEMq" isa-set="AVX" string="VMOVHPS (XMM, XMM, M64)" summary="Move High Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVHPS_XMM_XMM_M64.html" url-ref="felixcloutier.com/x86/MOVHPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVHPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVHPS" iform="VMOVHPS_MEMq_XMMdq" isa-set="AVX" string="VMOVHPS (M64, XMM)" summary="Move High Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVHPS_M64_XMM.html" url-ref="felixcloutier.com/x86/MOVHPS.html" vex="1">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP12" uops="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP4" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVLHPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVLHPS" iform="VMOVLHPS_XMMdq_XMMq_XMMq" isa-set="AVX" string="VMOVLHPS (XMM, XMM, XMM)" summary="Move Packed Single-Precision Floating-Point Values Low to High" url="uops.info/html-instr/VMOVLHPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVLHPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVLPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVLPD" iform="VMOVLPD_XMMdq_XMMdq_MEMq" isa-set="AVX" string="VMOVLPD (XMM, XMM, M64)" summary="Move Low Packed Double-Precision Floating-Point Value" url="uops.info/html-instr/VMOVLPD_XMM_XMM_M64.html" url-ref="felixcloutier.com/x86/MOVLPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVLPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVLPD" iform="VMOVLPD_MEMq_XMMq" isa-set="AVX" string="VMOVLPD (M64, XMM)" summary="Move Low Packed Double-Precision Floating-Point Value" url="uops.info/html-instr/VMOVLPD_M64_XMM.html" url-ref="felixcloutier.com/x86/MOVLPD.html" vex="1">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP4" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVLPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVLPS" iform="VMOVLPS_XMMdq_XMMdq_MEMq" isa-set="AVX" string="VMOVLPS (XMM, XMM, M64)" summary="Move Low Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVLPS_XMM_XMM_M64.html" url-ref="felixcloutier.com/x86/MOVLPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVLPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVLPS" iform="VMOVLPS_MEMq_XMMq" isa-set="AVX" string="VMOVLPS (M64, XMM)" summary="Move Low Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVLPS_M64_XMM.html" url-ref="felixcloutier.com/x86/MOVLPS.html" vex="1">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="64" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP4" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVMSKPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVMSKPD" iform="VMOVMSKPD_GPR32d_XMMdq" isa-set="AVX" string="VMOVMSKPD (R32, XMM)" summary="Extract Packed Double-Precision Floating-Point Sign Mask" url="uops.info/html-instr/VMOVMSKPD_R32_XMM.html" url-ref="felixcloutier.com/x86/MOVMSKPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVMSKPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVMSKPD" iform="VMOVMSKPD_GPR32d_YMMqq" isa-set="AVX" string="VMOVMSKPD (R32, YMM)" summary="Extract Packed Double-Precision Floating-Point Sign Mask" url="uops.info/html-instr/VMOVMSKPD_R32_YMM.html" url-ref="felixcloutier.com/x86/MOVMSKPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVMSKPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVMSKPS" iform="VMOVMSKPS_GPR32d_XMMdq" isa-set="AVX" string="VMOVMSKPS (R32, XMM)" summary="Extract Packed Single-Precision Floating-Point Sign Mask" url="uops.info/html-instr/VMOVMSKPS_R32_XMM.html" url-ref="felixcloutier.com/x86/MOVMSKPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVMSKPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVMSKPS" iform="VMOVMSKPS_GPR32d_YMMqq" isa-set="AVX" string="VMOVMSKPS (R32, YMM)" summary="Extract Packed Single-Precision Floating-Point Sign Mask" url="uops.info/html-instr/VMOVMSKPS_R32_YMM.html" url-ref="felixcloutier.com/x86/MOVMSKPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTDQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTDQ" iform="VMOVNTDQ_MEMdq_XMMdq" isa-set="AVX" string="VMOVNTDQ (M128, XMM)" summary="Store Packed Integers Using Non-Temporal Hint" url="uops.info/html-instr/VMOVNTDQ_M128_XMM.html" url-ref="felixcloutier.com/x86/MOVNTDQ.html" vex="1">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="356" cycles_addr_index="355" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="359" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="325" cycles_addr_index="324" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="325" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="385" cycles_addr_index="385" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="387" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="344" cycles_addr_index="344" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="288" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="357" cycles_addr_index="357" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="359" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="891" cycles_addr_index="892" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="910" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="471" cycles_addr_index="471" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="470" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="431" cycles_addr_index="431" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="432" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="348" cycles_addr_index="352" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="332" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="871" cycles_addr_index="870" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="878" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.02" TP_ports="0.50" TP_unrolled="0.97" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="390" cycles_addr_index="390" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="382" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.84" TP_ports="0.50" TP_unrolled="3.70" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="496" cycles_addr_index="496" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="472" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.14" TP_ports="0.50" TP_unrolled="1.05" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="464" cycles_addr_index="465" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="448" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="1022" cycles_addr_index="1024" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1251" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="1197" cycles_addr_index="1202" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1193" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="771" cycles_addr_index="771" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="776" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP4" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTDQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTDQ" iform="VMOVNTDQ_MEMqq_YMMqq" isa-set="AVX" string="VMOVNTDQ (M256, YMM)" summary="Store Packed Integers Using Non-Temporal Hint" url="uops.info/html-instr/VMOVNTDQ_M256_YMM.html" url-ref="felixcloutier.com/x86/MOVNTDQ.html" vex="1">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="i32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="357" cycles_addr_index="356" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="360" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="323" cycles_addr_index="322" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="326" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="385" cycles_addr_index="385" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="388" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="344" cycles_addr_index="344" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="345" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="357" cycles_addr_index="357" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="359" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="892" cycles_addr_index="892" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="914" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="468" cycles_addr_index="471" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="475" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="431" cycles_addr_index="431" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="432" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="347" cycles_addr_index="351" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="343" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="867" cycles_addr_index="865" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="875" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.02" TP_ports="0.50" TP_unrolled="1.05" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="390" cycles_addr_index="390" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="382" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.62" TP_ports="0.50" TP_unrolled="3.54" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="500" cycles_addr_index="497" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="498" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.14" TP_ports="0.50" TP_unrolled="1.07" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="465" cycles_addr_index="464" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="452" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="1024" cycles_addr_index="1024" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1251" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="1" ports="FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="1200" cycles_addr_index="1199" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1191" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="771" cycles_addr_index="770" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="771" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP4" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTDQA" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTDQA" iform="VMOVNTDQA_XMMdq_MEMdq" isa-set="AVX" string="VMOVNTDQA (XMM, M128)" summary="Load Double Quadword Non-Temporal Aligned Hint" url="uops.info/html-instr/VMOVNTDQA_XMM_M128.html" url-ref="felixcloutier.com/x86/MOVNTDQA.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p015+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTPD" iform="VMOVNTPD_MEMdq_XMMdq" isa-set="AVX" string="VMOVNTPD (M128, XMM)" summary="Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint" url="uops.info/html-instr/VMOVNTPD_M128_XMM.html" url-ref="felixcloutier.com/x86/MOVNTPD.html" vex="1">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="356" cycles_addr_index="356" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="360" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="324" cycles_addr_index="319" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="321" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="385" cycles_addr_index="385" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="385" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="344" cycles_addr_index="344" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="288" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="357" cycles_addr_index="357" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="358" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="893" cycles_addr_index="892" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="912" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="471" cycles_addr_index="468" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="468" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.36" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="431" cycles_addr_index="431" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="432" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="347" cycles_addr_index="346" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="350" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.02" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="873" cycles_addr_index="869" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="865" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.03" TP_ports="0.50" TP_unrolled="1.04" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="390" cycles_addr_index="390" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="380" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.63" TP_ports="0.50" TP_unrolled="3.80" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="500" cycles_addr_index="498" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="491" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.15" TP_ports="0.50" TP_unrolled="1.13" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="466" cycles_addr_index="466" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="431" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="1024" cycles_addr_index="1022" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1253" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="1204" cycles_addr_index="1201" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1192" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="772" cycles_addr_index="773" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="770" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP4" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTPD" iform="VMOVNTPD_MEMqq_YMMqq" isa-set="AVX" string="VMOVNTPD (M256, YMM)" summary="Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint" url="uops.info/html-instr/VMOVNTPD_M256_YMM.html" url-ref="felixcloutier.com/x86/MOVNTPD.html" vex="1">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="357" cycles_addr_index="356" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="360" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="315" cycles_addr_index="319" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="322" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="385" cycles_addr_index="385" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="392" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="344" cycles_addr_index="344" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="345" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="357" cycles_addr_index="357" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="358" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="0.98" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="892" cycles_addr_index="892" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="915" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.20" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="468" cycles_addr_index="471" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="479" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.20" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="431" cycles_addr_index="431" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="430" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="348" cycles_addr_index="344" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="355" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.02" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="872" cycles_addr_index="869" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="880" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.04" TP_ports="0.50" TP_unrolled="1.06" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="390" cycles_addr_index="390" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="381" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.82" TP_ports="0.50" TP_unrolled="3.92" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="500" cycles_addr_index="505" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="495" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.16" TP_ports="0.50" TP_unrolled="1.13" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="466" cycles_addr_index="466" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="452" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="1024" cycles_addr_index="1024" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1249" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="1" ports="FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="1199" cycles_addr_index="1197" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1190" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="772" cycles_addr_index="772" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="773" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP4" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTPS" iform="VMOVNTPS_MEMdq_XMMdq" isa-set="AVX" string="VMOVNTPS (M128, XMM)" summary="Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint" url="uops.info/html-instr/VMOVNTPS_M128_XMM.html" url-ref="felixcloutier.com/x86/MOVNTPS.html" vex="1">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="356" cycles_addr_index="356" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="360" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="325" cycles_addr_index="325" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="316" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="385" cycles_addr_index="385" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="392" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="344" cycles_addr_index="344" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="288" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="357" cycles_addr_index="357" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="358" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="0.98" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="893" cycles_addr_index="907" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="913" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.33" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="469" cycles_addr_index="471" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="469" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="431" cycles_addr_index="431" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="432" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="366" cycles_addr_index="347" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="374" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="871" cycles_addr_index="866" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="874" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.03" TP_ports="0.50" TP_unrolled="1.00" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="390" cycles_addr_index="390" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="382" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.63" TP_ports="0.50" TP_unrolled="3.95" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="498" cycles_addr_index="506" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="483" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.15" TP_ports="0.50" TP_unrolled="1.16" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="466" cycles_addr_index="466" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="446" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="1021" cycles_addr_index="1023" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1249" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="1204" cycles_addr_index="1202" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1192" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="778" cycles_addr_index="773" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="771" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP4" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVNTPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVNTPS" iform="VMOVNTPS_MEMqq_YMMqq" isa-set="AVX" string="VMOVNTPS (M256, YMM)" summary="Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint" url="uops.info/html-instr/VMOVNTPS_M256_YMM.html" url-ref="felixcloutier.com/x86/MOVNTPS.html" vex="1">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="356" cycles_addr_index="355" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="360" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="285" cycles_addr_index="311" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="329" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="385" cycles_addr_index="385" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="392" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="344" cycles_addr_index="344" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="341" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="357" cycles_addr_index="357" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="358" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="0.98" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="893" cycles_addr_index="893" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="913" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="468" cycles_addr_index="468" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="469" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.14" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="431" cycles_addr_index="431" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="432" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="347" cycles_addr_index="350" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="356" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="869" cycles_addr_index="870" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="878" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.03" TP_ports="0.50" TP_unrolled="1.03" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="390" cycles_addr_index="390" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="383" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.84" TP_ports="0.50" TP_unrolled="3.67" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="500" cycles_addr_index="506" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="488" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.16" TP_ports="0.50" TP_unrolled="1.17" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="466" cycles_addr_index="466" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="442" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="1024" cycles_addr_index="1024" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1250" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="1" ports="FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="1200" cycles_addr_index="1200" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="1195" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="777" cycles_addr_index="773" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="773" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP4" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVQ" iform="VMOVQ_XMMdq_GPR64q" isa-set="AVX" string="VMOVQ (XMM, R64)" summary="Move Doubleword/Move Quadword" url="uops.info/html-instr/VMOVQ_XMM_R64.html" url-ref="felixcloutier.com/x86/MOVD:MOVQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVQ" iform="VMOVQ_GPR64q_XMMq" isa-set="AVX" string="VMOVQ (R64, XMM)" summary="Move Doubleword/Move Quadword" url="uops.info/html-instr/VMOVQ_R64_XMM.html" url-ref="felixcloutier.com/x86/MOVD:MOVQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVQ" iform="VMOVQ_XMMdq_MEMq_7E" isa-set="AVX" string="VMOVQ (XMM, M64)" summary="Move Quadword" url="uops.info/html-instr/VMOVQ_XMM_M64.html" url-ref="felixcloutier.com/x86/MOVQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="i64"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVQ" iform="VMOVQ_XMMdq_XMMq_7E" isa-set="AVX" string="VMOVQ_7E (XMM, XMM)" summary="Move Quadword" url="uops.info/html-instr/VMOVQ_7E_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVQ" iform="VMOVQ_MEMq_XMMq_D6" isa-set="AVX" string="VMOVQ (M64, XMM)" summary="Move Quadword" url="uops.info/html-instr/VMOVQ_M64_XMM.html" url-ref="felixcloutier.com/x86/MOVQ.html" vex="1">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="i64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="64" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVQ" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVQ" iform="VMOVQ_XMMdq_XMMq_D6" isa-set="AVX" string="VMOVQ_D6 (XMM, XMM)" summary="Move Quadword" url="uops.info/html-instr/VMOVQ_D6_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="64" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.33" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVSD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSD" iform="VMOVSD_XMMdq_MEMq" isa-set="AVX" string="VMOVSD (XMM, M64)" summary="Move or Merge Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VMOVSD_XMM_M64.html" url-ref="felixcloutier.com/x86/MOVSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVSD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSD" iform="VMOVSD_XMMdq_XMMdq_XMMq_10" isa-set="AVX" string="VMOVSD_10 (XMM, XMM, XMM)" summary="Move or Merge Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VMOVSD_10_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVSD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSD" iform="VMOVSD_MEMq_XMMq" isa-set="AVX" string="VMOVSD (M64, XMM)" summary="Move or Merge Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VMOVSD_M64_XMM.html" url-ref="felixcloutier.com/x86/MOVSD.html" vex="1">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVSD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSD" iform="VMOVSD_XMMdq_XMMdq_XMMq_11" isa-set="AVX" string="VMOVSD_11 (XMM, XMM, XMM)" summary="Move or Merge Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VMOVSD_11_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVSHDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSHDUP" iform="VMOVSHDUP_XMMdq_MEMdq" isa-set="AVX" string="VMOVSHDUP (XMM, M128)" summary="Replicate Single FP Values" url="uops.info/html-instr/VMOVSHDUP_XMM_M128.html" url-ref="felixcloutier.com/x86/MOVSHDUP.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSHDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSHDUP" iform="VMOVSHDUP_XMMdq_XMMdq" isa-set="AVX" string="VMOVSHDUP (XMM, XMM)" summary="Replicate Single FP Values" url="uops.info/html-instr/VMOVSHDUP_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVSHDUP.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVSHDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSHDUP" iform="VMOVSHDUP_YMMqq_MEMqq" isa-set="AVX" string="VMOVSHDUP (YMM, M256)" summary="Replicate Single FP Values" url="uops.info/html-instr/VMOVSHDUP_YMM_M256.html" url-ref="felixcloutier.com/x86/MOVSHDUP.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSHDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSHDUP" iform="VMOVSHDUP_YMMqq_YMMqq" isa-set="AVX" string="VMOVSHDUP (YMM, YMM)" summary="Replicate Single FP Values" url="uops.info/html-instr/VMOVSHDUP_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVSHDUP.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVSLDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSLDUP" iform="VMOVSLDUP_XMMdq_MEMdq" isa-set="AVX" string="VMOVSLDUP (XMM, M128)" summary="Replicate Single FP Values" url="uops.info/html-instr/VMOVSLDUP_XMM_M128.html" url-ref="felixcloutier.com/x86/MOVSLDUP.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSLDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSLDUP" iform="VMOVSLDUP_XMMdq_XMMdq" isa-set="AVX" string="VMOVSLDUP (XMM, XMM)" summary="Replicate Single FP Values" url="uops.info/html-instr/VMOVSLDUP_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVSLDUP.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVSLDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSLDUP" iform="VMOVSLDUP_YMMqq_MEMqq" isa-set="AVX" string="VMOVSLDUP (YMM, M256)" summary="Replicate Single FP Values" url="uops.info/html-instr/VMOVSLDUP_YMM_M256.html" url-ref="felixcloutier.com/x86/MOVSLDUP.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMOVSLDUP" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSLDUP" iform="VMOVSLDUP_YMMqq_YMMqq" isa-set="AVX" string="VMOVSLDUP (YMM, YMM)" summary="Replicate Single FP Values" url="uops.info/html-instr/VMOVSLDUP_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVSLDUP.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVSS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSS" iform="VMOVSS_XMMdq_MEMd" isa-set="AVX" string="VMOVSS (XMM, M32)" summary="Move or Merge Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VMOVSS_XMM_M32.html" url-ref="felixcloutier.com/x86/MOVSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVSS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSS" iform="VMOVSS_XMMdq_XMMdq_XMMd_10" isa-set="AVX" string="VMOVSS_10 (XMM, XMM, XMM)" summary="Move or Merge Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VMOVSS_10_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVSS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSS" iform="VMOVSS_MEMd_XMMd" isa-set="AVX" string="VMOVSS (M32, XMM)" summary="Move or Merge Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VMOVSS_M32_XMM.html" url-ref="felixcloutier.com/x86/MOVSS.html" vex="1">
      <operand idx="1" memory-prefix="dword ptr" name="MEM0" type="mem" w="1" width="32" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVSS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVSS" iform="VMOVSS_XMMdq_XMMdq_XMMd_11" isa-set="AVX" string="VMOVSS_11 (XMM, XMM, XMM)" summary="Move or Merge Scalar Single-Precision Floating-Point Value" url="uops.info/html-instr/VMOVSS_11_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_XMMdq_MEMdq" isa-set="AVX" string="VMOVUPD (XMM, M128)" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPD_XMM_M128.html" url-ref="felixcloutier.com/x86/MOVUPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_XMMdq_XMMdq_10" isa-set="AVX" string="VMOVUPD_10 (XMM, XMM)" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPD_10_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVUPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_MEMdq_XMMdq" isa-set="AVX" string="VMOVUPD (M128, XMM)" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPD_M128_XMM.html" url-ref="felixcloutier.com/x86/MOVUPD.html" vex="1">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_XMMdq_XMMdq_11" isa-set="AVX" string="VMOVUPD_11 (XMM, XMM)" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPD_11_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVUPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_YMMqq_MEMqq" isa-set="AVX" string="VMOVUPD (YMM, M256)" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPD_YMM_M256.html" url-ref="felixcloutier.com/x86/MOVUPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_YMMqq_YMMqq_10" isa-set="AVX" string="VMOVUPD_10 (YMM, YMM)" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPD_10_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVUPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_MEMqq_YMMqq" isa-set="AVX" string="VMOVUPD (M256, YMM)" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPD_M256_YMM.html" url-ref="felixcloutier.com/x86/MOVUPD.html" vex="1">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVUPD" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPD" iform="VMOVUPD_YMMqq_YMMqq_11" isa-set="AVX" string="VMOVUPD_11 (YMM, YMM)" summary="Move Unaligned Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPD_11_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVUPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_XMMdq_MEMdq" isa-set="AVX" string="VMOVUPS (XMM, M128)" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPS_XMM_M128.html" url-ref="felixcloutier.com/x86/MOVUPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="6" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="6" cycles_addr_index="6" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_XMMdq_XMMdq_10" isa-set="AVX" string="VMOVUPS_10 (XMM, XMM)" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPS_10_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVUPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_MEMdq_XMMdq" isa-set="AVX" string="VMOVUPS (M128, XMM)" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPS_M128_XMM.html" url-ref="felixcloutier.com/x86/MOVUPS.html" vex="1">
      <operand idx="1" memory-prefix="xmmword ptr" name="MEM0" type="mem" w="1" width="128" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_XMMdq_XMMdq_11" isa-set="AVX" string="VMOVUPS_11 (XMM, XMM)" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPS_11_XMM_XMM.html" url-ref="felixcloutier.com/x86/MOVUPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="0.33" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_YMMqq_MEMqq" isa-set="AVX" string="VMOVUPS (YMM, M256)" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPS_YMM_M256.html" url-ref="felixcloutier.com/x86/MOVUPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p23" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23" ports_indexed="1*p23" uops="1" uops_indexed="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p23" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p23" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p23" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="1">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{load} VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_YMMqq_YMMqq_10" isa-set="AVX" string="VMOVUPS_10 (YMM, YMM)" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPS_10_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVUPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_MEMqq_YMMqq" isa-set="AVX" string="VMOVUPS (M256, YMM)" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPS_M256_YMM.html" url-ref="felixcloutier.com/x86/MOVUPS.html" vex="1">
      <operand idx="1" memory-prefix="ymmword ptr" name="MEM0" type="mem" w="1" width="256" xtype="f32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="5" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p4" ports_indexed="1*p23+2*p4" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="2.00" TP_loop_indexed="2.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="2.00" TP_unrolled_indexed="2.00" ports="1*p23+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.91" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.90" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.92" TP_indexed="0.84" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p237+1*p4" ports_indexed="1*p23+1*p4" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p49+1*p78" uops="2" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP45" uops="1">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="{store} VMOVUPS" category="DATAXFER" cpl="3" extension="AVX" iclass="VMOVUPS" iform="VMOVUPS_YMMqq_YMMqq_11" isa-set="AVX" string="VMOVUPS_11 (YMM, YMM)" summary="Move Unaligned Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMOVUPS_11_YMM_YMM.html" url-ref="felixcloutier.com/x86/MOVUPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <measurement TP_loop="0.75" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.75" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.25" TP_no_interiteration="0.24" latency="1" uops="0" version="2.1"/>
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.25" TP_no_interiteration="0.24" uops="1" version="2.2"/>
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.56" TP_loop_same_reg="1.00" TP_ports_same_reg="1.00" TP_unrolled="0.62" TP_unrolled_same_reg="1.00" ports_same_reg="1*p5" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.24" uops="1" version="2.3"/>
        <IACA TP="0.24" uops="1" version="3.0"/>
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.25" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.20" TP_loop_same_reg="1.00" TP_ports_same_reg="0.33" TP_unrolled="0.25" TP_unrolled_same_reg="1.00" ports_same_reg="1*p015" uops="0" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="1">
          <latency cycles="0" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="3" cycles_same_reg="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FPU" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.17" TP_unrolled="0.25" uops="1">
          <latency cycles="0" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMPSADBW" category="AVX" cpl="3" extension="AVX" iclass="VMPSADBW" iform="VMPSADBW_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VMPSADBW (XMM, XMM, M128, I8)" summary="Compute Multiple Packed Sums of Absolute Difference" url="uops.info/html-instr/VMPSADBW_XMM_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/MPSADBW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u8"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="11" ports="1*p0+2*p15+1*p23" uops="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+2*p15+1*p23" ports_indexed="1*p0+2*p15+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+2*p15+1*p23" ports_indexed="1*p0+2*p15+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.02" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="11" ports="1*p0+2*p15+1*p23" uops="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+2*p15+1*p23" ports_indexed="1*p0+2*p15+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+2*p15+1*p23" ports_indexed="1*p0+2*p15+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.02" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="13" ports="1*p0+1*p23+2*p5" uops="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p0+1*p23+2*p5" ports_indexed="1*p0+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p0+1*p23+2*p5" ports_indexed="1*p0+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p0+1*p23+2*p5" uops="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p0+1*p23+2*p5" ports_indexed="1*p0+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p0+1*p23+2*p5" ports_indexed="1*p0+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p0+1*p23+2*p5" uops="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p23+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p23+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="6">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="6">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="1*FP1+1*FP12+1*FP3" uops="6">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMPSADBW" category="AVX" cpl="3" extension="AVX" iclass="VMPSADBW" iform="VMPSADBW_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VMPSADBW (XMM, XMM, XMM, I8)" summary="Compute Multiple Packed Sums of Absolute Difference" url="uops.info/html-instr/VMPSADBW_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/MPSADBW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+2*p15" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+2*p15" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+2*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0+2*p15" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+2*p15" uops="3" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+2*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="7" ports="1*p0+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p0+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p0+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.95" TP_ports="2.00" ports="1*p0+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p0+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p0+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="1*p0+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.96" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.96" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="4.5" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="ucode" latency="ucode" uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP03+1*FP1+1*FP12+1*FP23" uops="4">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="ucode" latency="ucode" uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VMULPD" category="AVX" cpl="3" extension="AVX" iclass="VMULPD" iform="VMULPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VMULPD (XMM, XMM, M128)" summary="Multiply Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMULPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/MULPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="11" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULPD" category="AVX" cpl="3" extension="AVX" iclass="VMULPD" iform="VMULPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VMULPD (XMM, XMM, XMM)" summary="Multiply Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMULPD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MULPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="5" ports="1*p01" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMULPD" category="AVX" cpl="3" extension="AVX" iclass="VMULPD" iform="VMULPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VMULPD (YMM, YMM, M256)" summary="Multiply Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMULPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/MULPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="12" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="12" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="12" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULPD" category="AVX" cpl="3" extension="AVX" iclass="VMULPD" iform="VMULPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VMULPD (YMM, YMM, YMM)" summary="Multiply Packed Double-Precision Floating-Point Values" url="uops.info/html-instr/VMULPD_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/MULPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="5" ports="1*p01" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMULPS" category="AVX" cpl="3" extension="AVX" iclass="VMULPS" iform="VMULPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" mxcsr="1" string="VMULPS (XMM, XMM, M128)" summary="Multiply Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMULPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/MULPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="11" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULPS" category="AVX" cpl="3" extension="AVX" iclass="VMULPS" iform="VMULPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" mxcsr="1" string="VMULPS (XMM, XMM, XMM)" summary="Multiply Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMULPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MULPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="5" ports="1*p01" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMULPS" category="AVX" cpl="3" extension="AVX" iclass="VMULPS" iform="VMULPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" mxcsr="1" string="VMULPS (YMM, YMM, M256)" summary="Multiply Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMULPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/MULPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="12" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="12" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="12" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="11.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULPS" category="AVX" cpl="3" extension="AVX" iclass="VMULPS" iform="VMULPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" mxcsr="1" string="VMULPS (YMM, YMM, YMM)" summary="Multiply Packed Single-Precision Floating-Point Values" url="uops.info/html-instr/VMULPS_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/MULPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="5" ports="1*p01" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP0/1" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMULSD" category="AVX" cpl="3" extension="AVX" iclass="VMULSD" iform="VMULSD_XMMdq_XMMdq_MEMq" isa-set="AVX" mxcsr="1" string="VMULSD (XMM, XMM, M64)" summary="Multiply Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VMULSD_XMM_XMM_M64.html" url-ref="felixcloutier.com/x86/MULSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="f64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="11" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULSD" category="AVX" cpl="3" extension="AVX" iclass="VMULSD" iform="VMULSD_XMMdq_XMMdq_XMMq" isa-set="AVX" mxcsr="1" string="VMULSD (XMM, XMM, XMM)" summary="Multiply Scalar Double-Precision Floating-Point Value" url="uops.info/html-instr/VMULSD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MULSD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="5" ports="1*p01" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="4" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VMULSS" category="AVX" cpl="3" extension="AVX" iclass="VMULSS" iform="VMULSS_XMMdq_XMMdq_MEMd" isa-set="AVX" mxcsr="1" string="VMULSS (XMM, XMM, M32)" summary="Multiply Scalar Single-Precision Floating-Point Values" url="uops.info/html-instr/VMULSS_XMM_XMM_M32.html" url-ref="felixcloutier.com/x86/MULSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="f32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="11" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.54" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VMULSS" category="AVX" cpl="3" extension="AVX" iclass="VMULSS" iform="VMULSS_XMMdq_XMMdq_XMMd" isa-set="AVX" mxcsr="1" string="VMULSS (XMM, XMM, XMM)" summary="Multiply Scalar Single-Precision Floating-Point Values" url="uops.info/html-instr/VMULSS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/MULSS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="5" ports="1*p01" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="4.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VORPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPD" iform="VORPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VORPD (XMM, XMM, M128)" summary="Bitwise Logical OR of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VORPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/ORPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VORPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPD" iform="VORPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VORPD (XMM, XMM, XMM)" summary="Bitwise Logical OR of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VORPD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/ORPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VORPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPD" iform="VORPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VORPD (YMM, YMM, M256)" summary="Bitwise Logical OR of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VORPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/ORPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="u64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VORPD" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPD" iform="VORPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VORPD (YMM, YMM, YMM)" summary="Bitwise Logical OR of Packed Double Precision Floating-Point Values" url="uops.info/html-instr/VORPD_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/ORPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VORPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPS" iform="VORPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VORPS (XMM, XMM, M128)" summary="Bitwise Logical OR of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VORPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/ORPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VORPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPS" iform="VORPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VORPS (XMM, XMM, XMM)" summary="Bitwise Logical OR of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VORPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/ORPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VORPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPS" iform="VORPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VORPS (YMM, YMM, M256)" summary="Bitwise Logical OR of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VORPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/ORPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="u32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VORPS" category="LOGICAL_FP" cpl="3" extension="AVX" iclass="VORPS" iform="VORPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VORPS (YMM, YMM, YMM)" summary="Bitwise Logical OR of Packed Single Precision Floating-Point Values" url="uops.info/html-instr/VORPS_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/ORPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_unrolled="0.50" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPABSB" category="AVX" cpl="3" extension="AVX" iclass="VPABSB" iform="VPABSB_XMMdq_MEMdq" isa-set="AVX" string="VPABSB (XMM, M128)" summary="Packed Absolute Value" url="uops.info/html-instr/VPABSB_XMM_M128.html" url-ref="felixcloutier.com/x86/PABSB:PABSW:PABSD:PABSQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPABSB" category="AVX" cpl="3" extension="AVX" iclass="VPABSB" iform="VPABSB_XMMdq_XMMdq" isa-set="AVX" string="VPABSB (XMM, XMM)" summary="Packed Absolute Value" url="uops.info/html-instr/VPABSB_XMM_XMM.html" url-ref="felixcloutier.com/x86/PABSB:PABSW:PABSD:PABSQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPABSD" category="AVX" cpl="3" extension="AVX" iclass="VPABSD" iform="VPABSD_XMMdq_MEMdq" isa-set="AVX" string="VPABSD (XMM, M128)" summary="Packed Absolute Value" url="uops.info/html-instr/VPABSD_XMM_M128.html" url-ref="felixcloutier.com/x86/PABSB:PABSW:PABSD:PABSQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPABSD" category="AVX" cpl="3" extension="AVX" iclass="VPABSD" iform="VPABSD_XMMdq_XMMdq" isa-set="AVX" string="VPABSD (XMM, XMM)" summary="Packed Absolute Value" url="uops.info/html-instr/VPABSD_XMM_XMM.html" url-ref="felixcloutier.com/x86/PABSB:PABSW:PABSD:PABSQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPABSW" category="AVX" cpl="3" extension="AVX" iclass="VPABSW" iform="VPABSW_XMMdq_MEMdq" isa-set="AVX" string="VPABSW (XMM, M128)" summary="Packed Absolute Value" url="uops.info/html-instr/VPABSW_XMM_M128.html" url-ref="felixcloutier.com/x86/PABSB:PABSW:PABSD:PABSQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPABSW" category="AVX" cpl="3" extension="AVX" iclass="VPABSW" iform="VPABSW_XMMdq_XMMdq" isa-set="AVX" string="VPABSW (XMM, XMM)" summary="Packed Absolute Value" url="uops.info/html-instr/VPABSW_XMM_XMM.html" url-ref="felixcloutier.com/x86/PABSB:PABSW:PABSD:PABSQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPACKSSDW" category="AVX" cpl="3" extension="AVX" iclass="VPACKSSDW" iform="VPACKSSDW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPACKSSDW (XMM, XMM, M128)" summary="Pack with Signed Saturation" url="uops.info/html-instr/VPACKSSDW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PACKSSWB:PACKSSDW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="9.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPACKSSDW" category="AVX" cpl="3" extension="AVX" iclass="VPACKSSDW" iform="VPACKSSDW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPACKSSDW (XMM, XMM, XMM)" summary="Pack with Signed Saturation" url="uops.info/html-instr/VPACKSSDW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PACKSSWB:PACKSSDW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPACKSSWB" category="AVX" cpl="3" extension="AVX" iclass="VPACKSSWB" iform="VPACKSSWB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPACKSSWB (XMM, XMM, M128)" summary="Pack with Signed Saturation" url="uops.info/html-instr/VPACKSSWB_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PACKSSWB:PACKSSDW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="9.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPACKSSWB" category="AVX" cpl="3" extension="AVX" iclass="VPACKSSWB" iform="VPACKSSWB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPACKSSWB (XMM, XMM, XMM)" summary="Pack with Signed Saturation" url="uops.info/html-instr/VPACKSSWB_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PACKSSWB:PACKSSDW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPACKUSDW" category="AVX" cpl="3" extension="AVX" iclass="VPACKUSDW" iform="VPACKUSDW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPACKUSDW (XMM, XMM, M128)" summary="Pack with Unsigned Saturation" url="uops.info/html-instr/VPACKUSDW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PACKUSDW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="9.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPACKUSDW" category="AVX" cpl="3" extension="AVX" iclass="VPACKUSDW" iform="VPACKUSDW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPACKUSDW (XMM, XMM, XMM)" summary="Pack with Unsigned Saturation" url="uops.info/html-instr/VPACKUSDW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PACKUSDW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPACKUSWB" category="AVX" cpl="3" extension="AVX" iclass="VPACKUSWB" iform="VPACKUSWB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPACKUSWB (XMM, XMM, M128)" summary="Pack with Unsigned Saturation" url="uops.info/html-instr/VPACKUSWB_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PACKUSWB.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="9.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPACKUSWB" category="AVX" cpl="3" extension="AVX" iclass="VPACKUSWB" iform="VPACKUSWB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPACKUSWB (XMM, XMM, XMM)" summary="Pack with Unsigned Saturation" url="uops.info/html-instr/VPACKUSWB_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PACKUSWB.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPADDB" category="AVX" cpl="3" extension="AVX" iclass="VPADDB" iform="VPADDB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDB (XMM, XMM, M128)" summary="Add Packed Integers" url="uops.info/html-instr/VPADDB_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDB" category="AVX" cpl="3" extension="AVX" iclass="VPADDB" iform="VPADDB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDB (XMM, XMM, XMM)" summary="Add Packed Integers" url="uops.info/html-instr/VPADDB_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPADDD" category="AVX" cpl="3" extension="AVX" iclass="VPADDD" iform="VPADDD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDD (XMM, XMM, M128)" summary="Add Packed Integers" url="uops.info/html-instr/VPADDD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDD" category="AVX" cpl="3" extension="AVX" iclass="VPADDD" iform="VPADDD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDD (XMM, XMM, XMM)" summary="Add Packed Integers" url="uops.info/html-instr/VPADDD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_loop_same_reg="0.33" TP_ports_same_reg="0.33" TP_unrolled="0.33" TP_unrolled_same_reg="0.33" ports_same_reg="1*FP013" uops="1" uops_same_reg="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPADDQ" category="AVX" cpl="3" extension="AVX" iclass="VPADDQ" iform="VPADDQ_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDQ (XMM, XMM, M128)" summary="Add Packed Integers" url="uops.info/html-instr/VPADDQ_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i64"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDQ" category="AVX" cpl="3" extension="AVX" iclass="VPADDQ" iform="VPADDQ_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDQ (XMM, XMM, XMM)" summary="Add Packed Integers" url="uops.info/html-instr/VPADDQ_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPADDSB" category="AVX" cpl="3" extension="AVX" iclass="VPADDSB" iform="VPADDSB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDSB (XMM, XMM, M128)" summary="Add Packed Signed Integers with Signed Saturation" url="uops.info/html-instr/VPADDSB_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PADDSB:PADDSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="4" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDSB" category="AVX" cpl="3" extension="AVX" iclass="VPADDSB" iform="VPADDSB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDSB (XMM, XMM, XMM)" summary="Add Packed Signed Integers with Signed Saturation" url="uops.info/html-instr/VPADDSB_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PADDSB:PADDSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPADDSW" category="AVX" cpl="3" extension="AVX" iclass="VPADDSW" iform="VPADDSW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDSW (XMM, XMM, M128)" summary="Add Packed Signed Integers with Signed Saturation" url="uops.info/html-instr/VPADDSW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PADDSB:PADDSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDSW" category="AVX" cpl="3" extension="AVX" iclass="VPADDSW" iform="VPADDSW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDSW (XMM, XMM, XMM)" summary="Add Packed Signed Integers with Signed Saturation" url="uops.info/html-instr/VPADDSW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PADDSB:PADDSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPADDUSB" category="AVX" cpl="3" extension="AVX" iclass="VPADDUSB" iform="VPADDUSB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDUSB (XMM, XMM, M128)" summary="Add Packed Unsigned Integers with Unsigned Saturation" url="uops.info/html-instr/VPADDUSB_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PADDUSB:PADDUSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDUSB" category="AVX" cpl="3" extension="AVX" iclass="VPADDUSB" iform="VPADDUSB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDUSB (XMM, XMM, XMM)" summary="Add Packed Unsigned Integers with Unsigned Saturation" url="uops.info/html-instr/VPADDUSB_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PADDUSB:PADDUSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPADDUSW" category="AVX" cpl="3" extension="AVX" iclass="VPADDUSW" iform="VPADDUSW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDUSW (XMM, XMM, M128)" summary="Add Packed Unsigned Integers with Unsigned Saturation" url="uops.info/html-instr/VPADDUSW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PADDUSB:PADDUSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u16"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDUSW" category="AVX" cpl="3" extension="AVX" iclass="VPADDUSW" iform="VPADDUSW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDUSW (XMM, XMM, XMM)" summary="Add Packed Unsigned Integers with Unsigned Saturation" url="uops.info/html-instr/VPADDUSW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PADDUSB:PADDUSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPADDW" category="AVX" cpl="3" extension="AVX" iclass="VPADDW" iform="VPADDW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPADDW (XMM, XMM, M128)" summary="Add Packed Integers" url="uops.info/html-instr/VPADDW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPADDW" category="AVX" cpl="3" extension="AVX" iclass="VPADDW" iform="VPADDW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPADDW (XMM, XMM, XMM)" summary="Add Packed Integers" url="uops.info/html-instr/VPADDW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PADDB:PADDW:PADDD:PADDQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPALIGNR" category="AVX" cpl="3" extension="AVX" iclass="VPALIGNR" iform="VPALIGNR_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPALIGNR (XMM, XMM, M128, I8)" summary="Packed Align Right" url="uops.info/html-instr/VPALIGNR_XMM_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/PALIGNR.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u8"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p15+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p15+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15+1*p23" uops="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPALIGNR" category="AVX" cpl="3" extension="AVX" iclass="VPALIGNR" iform="VPALIGNR_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPALIGNR (XMM, XMM, XMM, I8)" summary="Packed Align Right" url="uops.info/html-instr/VPALIGNR_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/PALIGNR.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPAND" category="LOGICAL" cpl="3" extension="AVX" iclass="VPAND" iform="VPAND_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPAND (XMM, XMM, M128)" summary="Logical AND" url="uops.info/html-instr/VPAND_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PAND.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u128"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPAND" category="LOGICAL" cpl="3" extension="AVX" iclass="VPAND" iform="VPAND_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPAND (XMM, XMM, XMM)" summary="Logical AND" url="uops.info/html-instr/VPAND_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PAND.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPANDN" category="LOGICAL" cpl="3" extension="AVX" iclass="VPANDN" iform="VPANDN_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPANDN (XMM, XMM, M128)" summary="Logical AND NOT" url="uops.info/html-instr/VPANDN_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PANDN.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u128"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p015+1*p23" uops="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPANDN" category="LOGICAL" cpl="3" extension="AVX" iclass="VPANDN" iform="VPANDN_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPANDN (XMM, XMM, XMM)" summary="Logical AND NOT" url="uops.info/html-instr/VPANDN_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PANDN.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" latency="1" ports="1*p015" uops="1" version="2.1"/>
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.34" TP_no_interiteration="0.35" TP_ports="0.33" ports="1*p015" uops="1" version="2.2"/>
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*p015" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.25" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.25" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FPU" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.17" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPAVGB" category="AVX" cpl="3" extension="AVX" iclass="VPAVGB" iform="VPAVGB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPAVGB (XMM, XMM, M128)" summary="Average Packed Integers" url="uops.info/html-instr/VPAVGB_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PAVGB:PAVGW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPAVGB" category="AVX" cpl="3" extension="AVX" iclass="VPAVGB" iform="VPAVGB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPAVGB (XMM, XMM, XMM)" summary="Average Packed Integers" url="uops.info/html-instr/VPAVGB_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PAVGB:PAVGW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPAVGW" category="AVX" cpl="3" extension="AVX" iclass="VPAVGW" iform="VPAVGW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPAVGW (XMM, XMM, M128)" summary="Average Packed Integers" url="uops.info/html-instr/VPAVGW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PAVGB:PAVGW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u16"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPAVGW" category="AVX" cpl="3" extension="AVX" iclass="VPAVGW" iform="VPAVGW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPAVGW (XMM, XMM, XMM)" summary="Average Packed Integers" url="uops.info/html-instr/VPAVGW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PAVGB:PAVGW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPBLENDVB" category="AVX" cpl="3" extension="AVX" iclass="VPBLENDVB" iform="VPBLENDVB_XMMdq_XMMdq_MEMdq_XMMdq" isa-set="AVX" string="VPBLENDVB (XMM, XMM, M128, XMM)" summary="Variable Blend Packed Bytes" url="uops.info/html-instr/VPBLENDVB_XMM_XMM_M128_XMM.html" url-ref="felixcloutier.com/x86/PBLENDVB.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i8"/>
      <operand idx="4" name="REG2" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="8" ports="2*p15+1*p23" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p15+1*p23" ports_indexed="2*p15+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p15+1*p23" ports_indexed="2*p15+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p15+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="8" ports="2*p15+1*p23" uops="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p15+1*p23" ports_indexed="2*p15+1*p23" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="2*p15+1*p23" ports_indexed="2*p15+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="2*p15+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="9" ports="1*p23+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p23+2*p5" ports_indexed="1*p23+2*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.95" TP_indexed="0.95" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="2.00" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.95" TP_indexed="0.95" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="1.00" TP_indexed="2.00" TP_ports="0.67" TP_ports_indexed="0.67" fusion_occurred="1" ports="2*p015+1*p23" ports_indexed="2*p015+1*p23" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p015+1*p23" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPBLENDVB" category="AVX" cpl="3" extension="AVX" iclass="VPBLENDVB" iform="VPBLENDVB_XMMdq_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPBLENDVB (XMM, XMM, XMM, XMM)" summary="Variable Blend Packed Bytes" url="uops.info/html-instr/VPBLENDVB_XMM_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PBLENDVB.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="REG3" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="2*p15" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="2*p15" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="2*p15" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="2*p15" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="2*p15" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="2*p15" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="2*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="2" ports="2*p5" uops="2" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.90" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.90" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="2.3"/>
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="2.3"/>
        <IACA TP="0.95" TP_ports="0.67" ports="2*p015" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="0.67" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="2*p015" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="2" start_op="4" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP0" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
          <latency cycles="1" start_op="4" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPBLENDW" category="AVX" cpl="3" extension="AVX" iclass="VPBLENDW" iform="VPBLENDW_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPBLENDW (XMM, XMM, M128, I8)" summary="Blend Packed Words" url="uops.info/html-instr/VPBLENDW_XMM_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/PBLENDW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u16"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p15+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p15+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPBLENDW" category="AVX" cpl="3" extension="AVX" iclass="VPBLENDW" iform="VPBLENDW_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPBLENDW (XMM, XMM, XMM, I8)" summary="Blend Packed Words" url="uops.info/html-instr/VPBLENDW_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/PBLENDW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.45" TP_ports="0.33" TP_unrolled="0.38" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.38" TP_ports="0.33" TP_unrolled="0.38" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.33" latency="1" ports="FP0/1/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.37" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPCLMULQDQ" category="AVX" cpl="3" extension="AVX" iclass="VPCLMULQDQ" iform="VPCLMULQDQ_XMMdq_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCLMULQDQ (XMM, XMM, XMM, I8)" summary="Carry-Less Multiplication Quadword" url="uops.info/html-instr/VPCLMULQDQ_XMM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/PCLMULQDQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <measurement TP_loop="8.00" TP_ports="6.00" TP_unrolled="8.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+4*p015+3*p05+4*p1+2*p15+3*p5" uops="18" uops_MITE="2" uops_MS="16" uops_retire_slots="18">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="8.00" TP_ports="6.00" TP_unrolled="8.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+4*p015+3*p05+4*p1+2*p15+3*p5" uops="18" uops_MITE="2" uops_MS="16" uops_retire_slots="18">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles="14" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="7" ports="2*p0+1*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p0+1*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p0+1*p5" uops="3" version="2.3"/>
        <IACA TP="1.95" TP_ports="2.00" ports="2*p0+1*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.99" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.99" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.99" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles="7" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="6.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles="6" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP0123+1*FP1+1*FP12" uops="4">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VPCLMULQDQ" category="AVX" cpl="3" extension="AVX" iclass="VPCLMULQDQ" iform="VPCLMULQDQ_XMMdq_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCLMULQDQ (XMM, XMM, M128, I8)" summary="Carry-Less Multiplication Quadword" url="uops.info/html-instr/VPCLMULQDQ_XMM_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/PCLMULQDQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u128">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <measurement TP_loop="8.00" TP_ports="5.67" TP_unrolled="8.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+3*p015+3*p05+4*p1+2*p15+1*p23+3*p5" uops="18" uops_MITE="2" uops_MS="16" uops_retire_slots="18">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="8.00" TP_ports="5.67" TP_unrolled="8.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+3*p015+3*p05+4*p1+2*p15+1*p23+3*p5" uops="18" uops_MITE="2" uops_MS="16" uops_retire_slots="18">
          <latency cycles="12" start_op="2" target_op="1"/>
          <latency cycles_addr="19" cycles_addr_index="19" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="18" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="7" ports="2*p0+1*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p23+1*p5" ports_indexed="2*p0+1*p23+1*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="2*p0+1*p23+1*p5" ports_indexed="2*p0+1*p23+1*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p0+1*p23+1*p5" uops="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p0+1*p23+1*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p23" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p0+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="7" start_op="2" target_op="1"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="6" start_op="2" target_op="1"/>
          <latency cycles_addr="13" cycles_addr_index="13" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP01+2*FP12" uops="4">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQB" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQB" iform="VPCMPEQB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPEQB (XMM, XMM, M128)" summary="Compare Packed Data for Equal" url="uops.info/html-instr/VPCMPEQB_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PCMPEQB:PCMPEQW:PCMPEQD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQB" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQB" iform="VPCMPEQB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPEQB (XMM, XMM, XMM)" summary="Compare Packed Data for Equal" url="uops.info/html-instr/VPCMPEQB_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PCMPEQB:PCMPEQW:PCMPEQD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQD" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQD" iform="VPCMPEQD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPEQD (XMM, XMM, M128)" summary="Compare Packed Data for Equal" url="uops.info/html-instr/VPCMPEQD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PCMPEQB:PCMPEQW:PCMPEQD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQD" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQD" iform="VPCMPEQD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPEQD (XMM, XMM, XMM)" summary="Compare Packed Data for Equal" url="uops.info/html-instr/VPCMPEQD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PCMPEQB:PCMPEQW:PCMPEQD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQQ" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQQ" iform="VPCMPEQQ_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPEQQ (XMM, XMM, M128)" summary="Compare Packed Qword Data for Equal" url="uops.info/html-instr/VPCMPEQQ_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PCMPEQQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u64"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQQ" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQQ" iform="VPCMPEQQ_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPEQQ (XMM, XMM, XMM)" summary="Compare Packed Qword Data for Equal" url="uops.info/html-instr/VPCMPEQQ_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PCMPEQQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQW" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQW" iform="VPCMPEQW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPEQW (XMM, XMM, M128)" summary="Compare Packed Data for Equal" url="uops.info/html-instr/VPCMPEQW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PCMPEQB:PCMPEQW:PCMPEQD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u16"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPEQW" category="AVX" cpl="3" extension="AVX" iclass="VPCMPEQW" iform="VPCMPEQW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPEQW (XMM, XMM, XMM)" summary="Compare Packed Data for Equal" url="uops.info/html-instr/VPCMPEQW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PCMPEQB:PCMPEQW:PCMPEQD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRI" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRI" iform="VPCMPESTRI_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCMPESTRI (XMM, M128, I8)" summary="Packed Compare Explicit Length Strings, Return Index" url="uops.info/html-instr/VPCMPESTRI_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/PCMPESTRI.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG1" r="1" suppressed="1" type="reg">EAX</operand>
      <operand idx="5" name="REG2" r="1" suppressed="1" type="reg">EDX</operand>
      <operand idx="6" name="REG3" suppressed="1" type="reg" w="1">ECX</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG4" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_mem="29" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="18" cycles_addr_index="18" start_op="2" target_op="7"/>
          <latency cycles="17" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="17" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="18" cycles_addr_index="18" cycles_mem="28" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="18" cycles_addr_index="18" start_op="2" target_op="7"/>
          <latency cycles="17" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="17" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="4.00" TP_indexed="4.00" TP_no_interiteration="4.00" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p0156+1*p23+3*p5" uops="9" uops_indexed="9" version="2.2"/>
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p0156+1*p23+3*p5" uops="9" uops_indexed="9" version="2.3"/>
        <IACA TP="3.73" TP_ports="4.00" ports="4*p0+1*p0156+1*p23+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="4.00" TP_indexed="4.00" TP_no_interiteration="4.00" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p0156+1*p23+3*p5" uops="9" uops_indexed="9" version="2.2"/>
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p0156+1*p23+3*p5" uops="9" uops_indexed="9" version="2.3"/>
        <IACA TP="3.72" TP_ports="4.00" ports="4*p0+1*p0156+1*p23+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="28" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p0156+1*p23+3*p5" uops="9" uops_indexed="9" version="2.3"/>
        <IACA TP="3.73" TP_ports="4.00" ports="4*p0+1*p0156+1*p23+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p0156+1*p23+3*p5" uops="9" uops_indexed="9" version="2.3"/>
        <IACA TP="3.73" TP_ports="4.00" ports="4*p0+1*p0156+1*p23+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="12">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="12">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_mem="23" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="1*FP0123+1*FP1+1*FP3" uops="12">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_mem="27" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="14" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRI" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRI" iform="VPCMPESTRI_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCMPESTRI (XMM, XMM, I8)" summary="Packed Compare Explicit Length Strings, Return Index" url="uops.info/html-instr/VPCMPESTRI_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/PCMPESTRI.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG2" r="1" suppressed="1" type="reg">EAX</operand>
      <operand idx="5" name="REG3" r="1" suppressed="1" type="reg">EDX</operand>
      <operand idx="6" name="REG4" suppressed="1" type="reg" w="1">ECX</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG5" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="4.00" TP_ports="3.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="17" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="17" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="4.00" TP_ports="3.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="17" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="17" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="4.00" TP_no_interiteration="4.00" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="2.2"/>
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="2.3"/>
        <IACA TP="3.62" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="4.00" TP_no_interiteration="4.00" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="2.2"/>
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="2.3"/>
        <IACA TP="3.62" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="2.3"/>
        <IACA TP="3.68" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="2.3"/>
        <IACA TP="3.68" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="6">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="6">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="1*FP0123+1*FP1+1*FP3" uops="8">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="14" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRIQ" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRI64" iform="VPCMPESTRI64_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCMPESTRI64 (XMM, M128, I8)" url="uops.info/html-instr/VPCMPESTRI64_XMM_M128_I8.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG1" r="1" suppressed="1" type="reg">RAX</operand>
      <operand idx="5" name="REG2" r="1" suppressed="1" type="reg">RDX</operand>
      <operand idx="6" name="REG3" suppressed="1" type="reg" w="1">RCX</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG4" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="50.00" TP_ports="12.00" TP_unrolled="50.00" available_simple_decoders="0" complex_decoder="1" ports="12*p0+7*p1+2*p15+12*p5" uops="32" uops_MITE="0" uops_MS="32" uops_retire_slots="32">
          <latency cycles="50" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="50" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="49" cycles_addr_index="49" cycles_mem="50" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="50" cycles_addr_index="50" start_op="2" target_op="7"/>
          <latency cycles="49" start_op="4" target_op="6"/>
          <latency cycles="50" start_op="4" target_op="7"/>
          <latency cycles="49" start_op="5" target_op="6"/>
          <latency cycles="50" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="46.00" TP_ports="13.00" TP_unrolled="46.00" available_simple_decoders="0" complex_decoder="1" ports="11*p0+8*p1+1*p23+13*p5" uops="33" uops_MITE="0" uops_MS="33" uops_retire_slots="33">
          <latency cycles="46" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="46" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="47" cycles_addr_index="47" cycles_mem="47" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="46" cycles_addr_index="46" start_op="2" target_op="7"/>
          <latency cycles="47" start_op="4" target_op="6"/>
          <latency cycles="47" start_op="4" target_op="7"/>
          <latency cycles="47" start_op="5" target_op="6"/>
          <latency cycles="47" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="4.00" TP_indexed="4.00" TP_no_interiteration="4.00" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p0156+1*p23+3*p5" uops="9" uops_indexed="9" version="2.2"/>
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p0156+1*p23+3*p5" uops="9" uops_indexed="9" version="2.3"/>
        <IACA TP="3.73" TP_ports="4.00" ports="4*p0+1*p0156+1*p23+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="4.00" TP_indexed="4.00" TP_no_interiteration="4.00" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p0156+1*p23+3*p5" uops="9" uops_indexed="9" version="2.2"/>
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p0156+1*p23+3*p5" uops="9" uops_indexed="9" version="2.3"/>
        <IACA TP="3.72" TP_ports="4.00" ports="4*p0+1*p0156+1*p23+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="28" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p0156+1*p23+3*p5" uops="9" uops_indexed="9" version="2.3"/>
        <IACA TP="3.73" TP_ports="4.00" ports="4*p0+1*p0156+1*p23+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p0156+1*p23+3*p5" uops="9" uops_indexed="9" version="2.3"/>
        <IACA TP="3.73" TP_ports="4.00" ports="4*p0+1*p0156+1*p23+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="23" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="29" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p015+1*p06+1*p1+1*p23+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="7.00" TP_unrolled="7.00" uops="10">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="12">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_mem="23" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="1*FP0123+1*FP1+1*FP3" uops="12">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_mem="27" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRIQ" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRI64" iform="VPCMPESTRI64_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCMPESTRI64 (XMM, XMM, I8)" url="uops.info/html-instr/VPCMPESTRI64_XMM_XMM_I8.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG2" r="1" suppressed="1" type="reg">RAX</operand>
      <operand idx="5" name="REG3" r="1" suppressed="1" type="reg">RDX</operand>
      <operand idx="6" name="REG4" suppressed="1" type="reg" w="1">RCX</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG5" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="93.31" TP_ports="19.00" TP_unrolled="93.25" available_simple_decoders="0" complex_decoder="1" ports="17*p0+10*p1+1*p15+19*p5" uops="33" uops_MITE="0" uops_MS="50" uops_retire_slots="33">
          <latency cycles="94" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="94" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="93" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="93" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="94" start_op="4" target_op="6"/>
          <latency cycles="94" start_op="4" target_op="7"/>
          <latency cycles="94" start_op="5" target_op="6"/>
          <latency cycles="93" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="92.40" TP_ports="21.00" TP_unrolled="92.25" available_simple_decoders="0" complex_decoder="1" ports="15*p0+12*p1+1*p23+21*p5" uops="49" uops_MITE="0" uops_MS="51" uops_retire_slots="33">
          <latency cycles="93" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="92" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="92" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="92" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="93" start_op="4" target_op="6"/>
          <latency cycles="93" start_op="4" target_op="7"/>
          <latency cycles="93" start_op="5" target_op="6"/>
          <latency cycles="93" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="4.00" TP_no_interiteration="4.00" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="2.2"/>
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="2.3"/>
        <IACA TP="3.62" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="4.00" TP_no_interiteration="4.00" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="2.2"/>
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="2.3"/>
        <IACA TP="3.62" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="2.3"/>
        <IACA TP="3.68" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="2.3"/>
        <IACA TP="3.68" TP_ports="4.00" ports="4*p0+1*p0156+3*p5" uops="8" version="3.0"/>
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="14" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="8">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="6">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="13" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="1*FP0123+1*FP1+1*FP3" uops="8">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="14" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRM" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRM" iform="VPCMPESTRM_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCMPESTRM (XMM, M128, I8)" summary="Packed Compare Explicit Length Strings, Return Mask" url="uops.info/html-instr/VPCMPESTRM_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/PCMPESTRM.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG1" r="1" suppressed="1" type="reg">EAX</operand>
      <operand idx="5" name="REG2" r="1" suppressed="1" type="reg">EDX</operand>
      <operand idx="6" name="REG3" suppressed="1" type="reg" w="1" width="128" xtype="i32">XMM0</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG4" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="18" cycles_addr_index="18" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="4.00" TP_ports="3.00" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+1*p23+2*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="18" cycles_addr_index="18" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="4.00" TP_indexed="4.00" TP_no_interiteration="4.00" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" uops_indexed="10" version="2.2"/>
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" uops_indexed="10" version="2.3"/>
        <IACA TP="3.66" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="4.00" TP_indexed="4.00" TP_no_interiteration="4.00" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" uops_indexed="10" version="2.2"/>
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" uops_indexed="10" version="2.3"/>
        <IACA TP="3.66" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" uops_indexed="10" version="2.3"/>
        <IACA TP="3.69" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="9" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" uops_indexed="10" version="2.3"/>
        <IACA TP="3.69" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="9" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="9" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="9" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="9" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="12">
          <latency cycles="7" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="12">
          <latency cycles="7" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="2*FP0123+1*FP1+1*FP3" uops="12">
          <latency cycles="6" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRM" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRM" iform="VPCMPESTRM_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCMPESTRM (XMM, XMM, I8)" summary="Packed Compare Explicit Length Strings, Return Mask" url="uops.info/html-instr/VPCMPESTRM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/PCMPESTRM.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG2" r="1" suppressed="1" type="reg">EAX</operand>
      <operand idx="5" name="REG3" r="1" suppressed="1" type="reg">EDX</operand>
      <operand idx="6" name="REG4" suppressed="1" type="reg" w="1" width="128" xtype="i32">XMM0</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG5" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="4.00" TP_ports="3.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="4.00" TP_ports="3.50" TP_unrolled="4.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p05+1*p1+3*p5" uops="8" uops_MITE="4" uops_MS="4" uops_retire_slots="8">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="17" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="17" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="4.00" TP_no_interiteration="4.00" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="2.2"/>
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="2.3"/>
        <IACA TP="3.57" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="4.00" TP_no_interiteration="4.00" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="2.2"/>
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="2.3"/>
        <IACA TP="3.55" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="2.3"/>
        <IACA TP="3.47" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="9" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="2.3"/>
        <IACA TP="3.47" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="9" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="9" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="9" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+3*p015+1*p06+1*p1+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="9" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+3*p015+1*p06+1*p1+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+3*p015+1*p06+1*p1+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+3*p015+1*p06+1*p1+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="7">
          <latency cycles="7" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="7" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="7">
          <latency cycles="7" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="7" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="3*FP0123+1*FP1" uops="7">
          <latency cycles="6" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="6" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRMQ" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRM64" iform="VPCMPESTRM64_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCMPESTRM64 (XMM, M128, I8)" url="uops.info/html-instr/VPCMPESTRM64_XMM_M128_I8.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG1" r="1" suppressed="1" type="reg">RAX</operand>
      <operand idx="5" name="REG2" r="1" suppressed="1" type="reg">RDX</operand>
      <operand idx="6" name="REG3" suppressed="1" type="reg" w="1" width="128" xtype="i32">XMM0</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG4" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="50.00" TP_ports="12.00" TP_unrolled="50.00" available_simple_decoders="0" complex_decoder="1" ports="12*p0+7*p1+2*p15+12*p5" uops="32" uops_MITE="0" uops_MS="32" uops_retire_slots="32">
          <latency cycles="50" start_op="1" target_op="6"/>
          <latency cycles="50" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="50" cycles_addr_index="50" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="50" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="50" cycles_addr_index="50" start_op="2" target_op="7"/>
          <latency cycles="50" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="50" start_op="4" target_op="7"/>
          <latency cycles="50" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="50" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="46.00" TP_ports="13.00" TP_unrolled="46.00" available_simple_decoders="0" complex_decoder="1" ports="11*p0+8*p1+1*p23+13*p5" uops="33" uops_MITE="0" uops_MS="33" uops_retire_slots="33">
          <latency cycles="46" start_op="1" target_op="6"/>
          <latency cycles="46" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="47" cycles_addr_index="48" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="46" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="46" cycles_addr_index="46" start_op="2" target_op="7"/>
          <latency cycles="46" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="47" start_op="4" target_op="7"/>
          <latency cycles="46" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="47" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="4.00" TP_indexed="4.00" TP_no_interiteration="4.00" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" uops_indexed="10" version="2.2"/>
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" uops_indexed="10" version="2.3"/>
        <IACA TP="3.66" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="4.00" TP_indexed="4.00" TP_no_interiteration="4.00" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" uops_indexed="10" version="2.2"/>
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" uops_indexed="10" version="2.3"/>
        <IACA TP="3.66" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="16" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" uops_indexed="10" version="2.3"/>
        <IACA TP="3.69" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="9" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="4.05" TP_indexed="4.05" TP_ports="4.00" TP_ports_indexed="4.00" fusion_occurred="1" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" ports_indexed="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" uops_indexed="10" version="2.3"/>
        <IACA TP="3.69" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+1*p23+3*p5" uops="10" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="9" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="9" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="9" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+1*p23+3*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="9" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+2*p015+1*p06+1*p1+1*p23+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="7.00" TP_unrolled="7.00" uops="10">
          <latency cycles="7" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="12">
          <latency cycles="7" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="4.00" TP_ports="1.00" TP_unrolled="4.00" ports="2*FP0123+1*FP1+1*FP3" uops="12">
          <latency cycles="6" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="6"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPESTRMQ" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPESTRM64" iform="VPCMPESTRM64_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCMPESTRM64 (XMM, XMM, I8)" url="uops.info/html-instr/VPCMPESTRM64_XMM_XMM_I8.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG2" r="1" suppressed="1" type="reg">RAX</operand>
      <operand idx="5" name="REG3" r="1" suppressed="1" type="reg">RDX</operand>
      <operand idx="6" name="REG4" suppressed="1" type="reg" w="1" width="128" xtype="i32">XMM0</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="7" name="REG5" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <measurement TP_loop="93.31" TP_ports="19.00" TP_unrolled="93.25" available_simple_decoders="0" complex_decoder="1" ports="17*p0+10*p1+1*p15+19*p5" uops="33" uops_MITE="0" uops_MS="50" uops_retire_slots="33">
          <latency cycles="94" start_op="1" target_op="6"/>
          <latency cycles="94" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="94" start_op="2" target_op="6"/>
          <latency cycles="93" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="93" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="94" start_op="4" target_op="7"/>
          <latency cycles="93" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="94" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <measurement TP_loop="92.46" TP_ports="21.00" TP_unrolled="92.33" available_simple_decoders="0" complex_decoder="1" ports="15*p0+12*p1+1*p23+21*p5" uops="49" uops_MITE="0" uops_MS="51" uops_retire_slots="33">
          <latency cycles="93" start_op="1" target_op="6"/>
          <latency cycles="92" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="92" start_op="2" target_op="6"/>
          <latency cycles="92" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="92" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="93" start_op="4" target_op="7"/>
          <latency cycles="92" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="93" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="4.00" TP_no_interiteration="4.00" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="2.2"/>
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="2.3"/>
        <IACA TP="3.57" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="4.00" TP_no_interiteration="4.00" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="2.2"/>
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="2.3"/>
        <IACA TP="3.55" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="15" start_op="4" target_op="7"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="15" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="2.3"/>
        <IACA TP="3.47" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="9" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="4.05" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="2.3"/>
        <IACA TP="3.47" TP_ports="4.00" ports="4*p0+1*p015+1*p0156+3*p5" uops="9" version="3.0"/>
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="9" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="9" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="9" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+3*p015+1*p06+1*p1+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="5.00" TP_ports="4.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p06+1*p1+4*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="10" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="9" start_op="2" target_op="6"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+3*p015+1*p06+1*p1+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+3*p015+1*p06+1*p1+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="5.00" TP_ports="3.00" TP_unrolled="5.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+3*p015+1*p06+1*p1+1*p5" uops="9" uops_MITE="4" uops_MS="5" uops_retire_slots="9">
          <latency cycles="11" start_op="1" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="11" start_op="2" target_op="6"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="16" start_op="4" target_op="7"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="16" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="6.00" TP_unrolled="6.00" uops="8">
          <latency cycles="7" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="7" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="7">
          <latency cycles="7" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="7" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="14" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="3.00" TP_ports="1.00" TP_unrolled="3.00" ports="3*FP0123+1*FP1" uops="7">
          <latency cycles="6" start_op="1" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="7"/>
          <latency cycles="6" start_op="2" target_op="6"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="7"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="4" target_op="6"/>
          <latency cycles="13" start_op="4" target_op="7"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="5" target_op="6"/>
          <latency cycles="12" start_op="5" target_op="7"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTB" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTB" iform="VPCMPGTB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPGTB (XMM, XMM, M128)" summary="Compare Packed Signed Integers for Greater Than" url="uops.info/html-instr/VPCMPGTB_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PCMPGTB:PCMPGTW:PCMPGTD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTB" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTB" iform="VPCMPGTB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPGTB (XMM, XMM, XMM)" summary="Compare Packed Signed Integers for Greater Than" url="uops.info/html-instr/VPCMPGTB_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PCMPGTB:PCMPGTW:PCMPGTD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" latency="1" ports="1*p15" uops="1" uops_same_reg="0" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" ports="1*p15" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" latency="1" ports="1*p15" uops="1" uops_same_reg="0" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" ports="1*p15" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" latency="1" ports="1*p15" uops="1" uops_same_reg="0" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" ports="1*p15" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" ports="1*p15" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p01" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" TP_same_reg="0.24" ports="1*p01" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p01" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" TP_same_reg="0.24" ports="1*p01" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.20" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.20" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.20" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_loop_same_reg="0.25" TP_ports="0.33" TP_unrolled="0.33" TP_unrolled_same_reg="0.25" ports="1*FP013" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_loop_same_reg="0.25" TP_ports="0.33" TP_unrolled="0.33" TP_unrolled_same_reg="0.25" ports="1*FP013" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.17" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTD" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTD" iform="VPCMPGTD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPGTD (XMM, XMM, M128)" summary="Compare Packed Signed Integers for Greater Than" url="uops.info/html-instr/VPCMPGTD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PCMPGTB:PCMPGTW:PCMPGTD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTD" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTD" iform="VPCMPGTD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPGTD (XMM, XMM, XMM)" summary="Compare Packed Signed Integers for Greater Than" url="uops.info/html-instr/VPCMPGTD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PCMPGTB:PCMPGTW:PCMPGTD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" latency="1" ports="1*p15" uops="1" uops_same_reg="0" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" ports="1*p15" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" latency="1" ports="1*p15" uops="1" uops_same_reg="0" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" ports="1*p15" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" latency="1" ports="1*p15" uops="1" uops_same_reg="0" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" ports="1*p15" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" ports="1*p15" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p01" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" TP_same_reg="0.24" ports="1*p01" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p01" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" TP_same_reg="0.24" ports="1*p01" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.20" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.20" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.20" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_loop_same_reg="0.25" TP_ports="0.33" TP_unrolled="0.33" TP_unrolled_same_reg="0.25" ports="1*FP013" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_loop_same_reg="0.25" TP_ports="0.33" TP_unrolled="0.33" TP_unrolled_same_reg="0.25" ports="1*FP013" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.17" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTQ" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTQ" iform="VPCMPGTQ_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPGTQ (XMM, XMM, M128)" summary="Compare Packed Data for Greater Than" url="uops.info/html-instr/VPCMPGTQ_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PCMPGTQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="9.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTQ" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTQ" iform="VPCMPGTQ_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPGTQ (XMM, XMM, XMM)" summary="Compare Packed Data for Greater Than" url="uops.info/html-instr/VPCMPGTQ_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PCMPGTQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_same_reg="0.25" latency="5" ports="1*p0" uops="1" uops_same_reg="0" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_same_reg="0.25" ports="1*p0" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" TP_same_reg="0.24" ports="1*p0" uops="1" uops_same_reg="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_same_reg="0.29" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="1.00" available_simple_decoders="0" available_simple_decoders_same_reg="0" complex_decoder="1" complex_decoder_same_reg="1" ports="1*p0" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="5" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_same_reg="0.25" latency="5" ports="1*p0" uops="1" uops_same_reg="0" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_same_reg="0.25" ports="1*p0" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" TP_same_reg="0.24" ports="1*p0" uops="1" uops_same_reg="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_same_reg="0.25" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="1.00" available_simple_decoders="0" available_simple_decoders_same_reg="0" complex_decoder="1" complex_decoder_same_reg="1" ports="1*p0" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="5" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_same_reg="0.25" latency="5" ports="1*p0" uops="1" uops_same_reg="0" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_same_reg="0.25" ports="1*p0" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" TP_same_reg="0.24" ports="1*p0" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" TP_same_reg="0.24" ports="1*p0" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_same_reg="0.25" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="1.00" available_simple_decoders="0" available_simple_decoders_same_reg="0" complex_decoder="1" complex_decoder_same_reg="1" ports="1*p0" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="5" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_same_reg="0.25" ports="1*p0" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" TP_same_reg="0.24" ports="1*p0" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.99" TP_ports="1.00" TP_same_reg="0.24" ports="1*p0" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_same_reg="0.25" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*p0" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="5" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="5" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" TP_same_reg="0.24" ports="1*p5" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" TP_same_reg="0.24" ports="1*p5" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_same_reg="0.25" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*p5" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="3" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="3" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" TP_same_reg="0.24" ports="1*p5" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" TP_same_reg="0.24" ports="1*p5" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_same_reg="0.25" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*p5" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="3" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="3" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_same_reg="0.25" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*p5" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="3" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="3" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_same_reg="0.25" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*p5" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="3" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="3" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_same_reg="0.25" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*p5" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="3" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="3" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_same_reg="0.25" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*p5" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="3" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="3" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_same_reg="0.20" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*p5" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="3" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="3" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_same_reg="0.20" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*p5" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="3" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="3" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_same_reg="0.20" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*p5" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="3" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="3" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_loop_same_reg="0.38" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*FP0" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_loop_same_reg="0.33" TP_ports="1.00" TP_unrolled="1.00" TP_unrolled_same_reg="0.31" ports="1*FP0" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.17" TP_ports="0.25" TP_unrolled="0.31" TP_unrolled_same_reg="0.31" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTW" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTW" iform="VPCMPGTW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPCMPGTW (XMM, XMM, M128)" summary="Compare Packed Signed Integers for Greater Than" url="uops.info/html-instr/VPCMPGTW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PCMPGTB:PCMPGTW:PCMPGTD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPGTW" category="AVX" cpl="3" extension="AVX" iclass="VPCMPGTW" iform="VPCMPGTW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPCMPGTW (XMM, XMM, XMM)" summary="Compare Packed Signed Integers for Greater Than" url="uops.info/html-instr/VPCMPGTW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PCMPGTB:PCMPGTW:PCMPGTD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" latency="1" ports="1*p15" uops="1" uops_same_reg="0" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" ports="1*p15" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" latency="1" ports="1*p15" uops="1" uops_same_reg="0" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" ports="1*p15" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" latency="1" ports="1*p15" uops="1" uops_same_reg="0" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" ports="1*p15" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_same_reg="0.25" ports="1*p15" uops="1" uops_same_reg="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" TP_same_reg="0.24" ports="1*p15" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p15" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p01" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" TP_same_reg="0.24" ports="1*p01" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" TP_same_reg="0.24" ports="1*p01" uops="1" uops_same_reg="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" TP_same_reg="0.24" ports="1*p01" uops="1" uops_same_reg="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.25" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.20" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.20" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_same_reg="0.20" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_same_reg="0.25" ports="1*p01" uops="1" uops_MITE="1" uops_MITE_same_reg="1" uops_MS="0" uops_MS_same_reg="0" uops_retire_slots="1" uops_retire_slots_same_reg="1" uops_same_reg="0">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_loop_same_reg="0.25" TP_ports="0.33" TP_unrolled="0.33" TP_unrolled_same_reg="0.25" ports="1*FP013" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_loop_same_reg="0.25" TP_ports="0.33" TP_unrolled="0.33" TP_unrolled_same_reg="0.25" ports="1*FP013" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_loop_same_reg="0.17" TP_ports="0.25" TP_unrolled="0.25" TP_unrolled_same_reg="0.25" ports="1*FP0123" uops="1" uops_same_reg="1">
          <latency cycles="1" cycles_same_reg="0" start_op="2" target_op="1"/>
          <latency cycles="1" cycles_same_reg="0" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPCMPISTRI" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPISTRI" iform="VPCMPISTRI_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCMPISTRI (XMM, M128, I8)" summary="Packed Compare Implicit Length Strings, Return Index" url="uops.info/html-instr/VPCMPISTRI_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/PCMPISTRI.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG1" suppressed="1" type="reg" w="1">ECX</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="5" name="REG2" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" latency="17" ports="3*p0+1*p23" uops="4" version="2.1"/>
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="2.2"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="2.3"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="28" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" latency="17" ports="3*p0+1*p23" uops="4" version="2.1"/>
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="2.2"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="2.3"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="27" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="17" ports="2*p0+1*p1+1*p23" uops="4" version="2.1"/>
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="2.2"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="2.3"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="26" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="2.2"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="2.3"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_mem="28" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="2.3"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="2.3"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_mem="30" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="3">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_mem="25" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="3">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_mem="23" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="4">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_mem="27" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPISTRI" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPISTRI" iform="VPCMPISTRI_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCMPISTRI (XMM, XMM, I8)" summary="Packed Compare Implicit Length Strings, Return Index" url="uops.info/html-instr/VPCMPISTRI_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/PCMPISTRI.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG2" suppressed="1" type="reg" w="1">ECX</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="5" name="REG3" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" latency="11" ports="3*p0" uops="3" version="2.1"/>
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.2"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.3"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" latency="11" ports="3*p0" uops="3" version="2.1"/>
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.2"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.3"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="11" ports="2*p0+1*p1" uops="3" version="2.1"/>
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.2"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.3"/>
        <IACA TP="2.92" TP_ports="3.00" ports="3*p0" uops="3" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.2"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.3"/>
        <IACA TP="2.91" TP_ports="3.00" ports="3*p0" uops="3" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.3"/>
        <IACA TP="2.92" TP_ports="3.00" ports="3*p0" uops="3" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.3"/>
        <IACA TP="2.92" TP_ports="3.00" ports="3*p0" uops="3" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
        <doc TP="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
        <doc TP="2.00" latency="2" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="2">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="0.50" TP_unrolled="2.00" ports="1*FP01" uops="4">
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VPCMPISTRM" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPISTRM" iform="VPCMPISTRM_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPCMPISTRM (XMM, M128, I8)" summary="Packed Compare Implicit Length Strings, Return Mask" url="uops.info/html-instr/VPCMPISTRM_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/PCMPISTRM.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG1" suppressed="1" type="reg" w="1" width="128" xtype="i32">XMM0</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="5" name="REG2" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" latency="17" ports="3*p0+1*p23" uops="4" version="2.1"/>
        <IACA TP="3.00" TP_indexed="3.00" TP_no_interiteration="3.00" TP_ports="3.00" TP_ports_indexed="3.00" fusion_occurred="1" ports="3*p0+1*p23" ports_indexed="3*p0+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="3.00" TP_indexed="3.00" TP_ports="3.00" TP_ports_indexed="3.00" fusion_occurred="1" ports="3*p0+1*p23" ports_indexed="3*p0+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" latency="17" ports="3*p0+1*p23" uops="4" version="2.1"/>
        <IACA TP="3.00" TP_indexed="3.00" TP_no_interiteration="3.00" TP_ports="3.00" TP_ports_indexed="3.00" fusion_occurred="1" ports="3*p0+1*p23" ports_indexed="3*p0+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="3.00" TP_indexed="3.00" TP_ports="3.00" TP_ports_indexed="3.00" fusion_occurred="1" ports="3*p0+1*p23" ports_indexed="3*p0+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="17" ports="2*p0+1*p1+1*p23" uops="4" version="2.1"/>
        <IACA TP="3.00" TP_indexed="3.00" TP_no_interiteration="3.00" TP_ports="3.00" TP_ports_indexed="3.00" fusion_occurred="1" ports="3*p0+1*p23" ports_indexed="3*p0+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="3.00" TP_indexed="3.00" TP_ports="3.00" TP_ports_indexed="3.00" fusion_occurred="1" ports="3*p0+1*p23" ports_indexed="3*p0+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="17" cycles_addr_index="17" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="3.00" TP_indexed="3.00" TP_no_interiteration="3.00" TP_ports="3.00" TP_ports_indexed="3.00" fusion_occurred="1" ports="3*p0+1*p23" ports_indexed="3*p0+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="3.00" TP_indexed="3.00" TP_ports="3.00" TP_ports_indexed="3.00" fusion_occurred="1" ports="3*p0+1*p23" ports_indexed="3*p0+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="15" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="17" cycles_addr_index="17" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="3.00" TP_indexed="3.00" TP_ports="3.00" TP_ports_indexed="3.00" fusion_occurred="1" ports="3*p0+1*p23" ports_indexed="3*p0+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="15" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="3.00" TP_indexed="3.00" TP_ports="3.00" TP_ports_indexed="3.00" fusion_occurred="1" ports="3*p0+1*p23" ports_indexed="3*p0+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0+1*p23" uops="4" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="15" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
        <doc TP="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="1" complex_decoder="1" ports="3*p0+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="16" cycles_addr_index="16" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="16" cycles_addr_index="16" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="7" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="7" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="15" cycles_addr_index="15" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="0.67" TP_unrolled="2.00" ports="1*FP01+1*FP12" uops="4">
          <latency cycles="6" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles_addr="14" cycles_addr_index="14" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="14" cycles_mem_is_upper_bound="1" start_op="2" target_op="4"/>
          <latency cycles_addr="14" cycles_addr_index="14" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPCMPISTRM" category="STTNI" cpl="3" extension="AVX" iclass="VPCMPISTRM" iform="VPCMPISTRM_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPCMPISTRM (XMM, XMM, I8)" summary="Packed Compare Implicit Length Strings, Return Mask" url="uops.info/html-instr/VPCMPISTRM_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/PCMPISTRM.html" vex="1">
      <operand idx="1" name="REG0" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <operand idx="4" name="REG2" suppressed="1" type="reg" w="1" width="128" xtype="i32">XMM0</operand>
      <operand flag_AF="w" flag_CF="w" flag_OF="w" flag_PF="w" flag_SF="w" flag_ZF="w" idx="5" name="REG3" suppressed="1" type="flags" w="1"/>
      <architecture name="SNB">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" latency="11" ports="3*p0" uops="3" version="2.1"/>
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.2"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.3"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="10" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" latency="11" ports="3*p0" uops="3" version="2.1"/>
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.2"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.3"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="10" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="11" ports="2*p0+1*p1" uops="3" version="2.1"/>
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.2"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.3"/>
        <IACA TP="2.92" TP_ports="3.00" ports="3*p0" uops="3" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="10" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="3.00" TP_no_interiteration="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.2"/>
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.3"/>
        <IACA TP="2.91" TP_ports="3.00" ports="3*p0" uops="3" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="0" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="10" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="10" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.3"/>
        <IACA TP="2.92" TP_ports="3.00" ports="3*p0" uops="3" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="1" target_op="4"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="8" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="3.00" TP_ports="3.00" ports="3*p0" uops="3" version="2.3"/>
        <IACA TP="2.92" TP_ports="3.00" ports="3*p0" uops="3" version="3.0"/>
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="1" target_op="4"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="8" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="1" target_op="4"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="8" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="1" target_op="4"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="8" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="8" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="9" start_op="1" target_op="4"/>
          <latency cycles="12" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="8" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="8" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
        <doc TP="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="8" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="3.00" TP_ports="3.00" TP_unrolled="3.00" available_simple_decoders="2" complex_decoder="1" ports="3*p0" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="8" start_op="1" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="8" start_op="2" target_op="4"/>
          <latency cycles="11" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="3">
          <latency cycles="7" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="7" start_op="2" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
        <doc TP="2.00" latency="2" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="3">
          <latency cycles="7" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="7" start_op="2" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="0.67" TP_unrolled="2.00" ports="1*FP01+1*FP12" uops="3">
          <latency cycles="6" start_op="1" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="1" target_op="5"/>
          <latency cycles="6" start_op="2" target_op="4"/>
          <latency cycles="10" cycles_is_upper_bound="1" start_op="2" target_op="5"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VPERM2F128" category="AVX" cpl="3" extension="AVX" iclass="VPERM2F128" iform="VPERM2F128_YMMqq_YMMqq_MEMqq_IMMb" isa-set="AVX" string="VPERM2F128 (YMM, YMM, M256, I8)" summary="Permute Floating-Point Values" url="uops.info/html-instr/VPERM2F128_YMM_YMM_M256_I8.html" url-ref="felixcloutier.com/x86/VPERM2F128.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="10" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="12">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERM2F128" category="AVX" cpl="3" extension="AVX" iclass="VPERM2F128" iform="VPERM2F128_YMMqq_YMMqq_YMMqq_IMMb" isa-set="AVX" string="VPERM2F128 (YMM, YMM, YMM, I8)" summary="Permute Floating-Point Values" url="uops.info/html-instr/VPERM2F128_YMM_YMM_YMM_I8.html" url-ref="felixcloutier.com/x86/VPERM2F128.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="3.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="3.00" TP_unrolled="3.00" uops="8">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="3.00" latency="3" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP2" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPERMILPD (XMM, XMM, M128)" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/VPERMILPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPERMILPD (XMM, XMM, XMM)" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/VPERMILPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="3" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VPERMILPD (YMM, YMM, M256)" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPD_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/VPERMILPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="u64"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VPERMILPD (YMM, YMM, YMM)" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPD_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/VPERMILPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="u64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="4.00" latency="4" ports="FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPERMILPD (XMM, M128, I8)" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPD_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/VPERMILPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f64"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPERMILPD (XMM, XMM, I8)" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPD_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/VPERMILPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_YMMqq_MEMqq_IMMb" isa-set="AVX" string="VPERMILPD (YMM, M256, I8)" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPD_YMM_M256_I8.html" url-ref="felixcloutier.com/x86/VPERMILPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f64"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPD" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPD" iform="VPERMILPD_YMMqq_YMMqq_IMMb" isa-set="AVX" string="VPERMILPD (YMM, YMM, I8)" summary="Permute In-Lane of Pairs of Double-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPD_YMM_YMM_I8.html" url-ref="felixcloutier.com/x86/VPERMILPD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f64">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPS" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPS" iform="VPERMILPS_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPERMILPS (XMM, XMM, M128)" summary="Permute In-Lane of Quadruples of Single-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPS_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/VPERMILPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPS" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPS" iform="VPERMILPS_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPERMILPS (XMM, XMM, XMM)" summary="Permute In-Lane of Quadruples of Single-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPS_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/VPERMILPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="3" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPS" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPS" iform="VPERMILPS_YMMqq_YMMqq_MEMqq" isa-set="AVX" string="VPERMILPS (YMM, YMM, M256)" summary="Permute In-Lane of Quadruples of Single-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPS_YMM_YMM_M256.html" url-ref="felixcloutier.com/x86/VPERMILPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="u32"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="12" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="13" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPS" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPS" iform="VPERMILPS_YMMqq_YMMqq_YMMqq" isa-set="AVX" string="VPERMILPS (YMM, YMM, YMM)" summary="Permute In-Lane of Quadruples of Single-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPS_YMM_YMM_YMM.html" url-ref="felixcloutier.com/x86/VPERMILPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="256" xtype="u32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="4.00" TP_unrolled="4.00" uops="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="4.00" latency="4" ports="FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPS" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPS" iform="VPERMILPS_XMMdq_MEMdq_IMMb" isa-set="AVX" string="VPERMILPS (XMM, M128, I8)" summary="Permute In-Lane of Quadruples of Single-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPS_XMM_M128_I8.html" url-ref="felixcloutier.com/x86/VPERMILPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="f32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="7" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*FP12" uops="1" uops_indexed="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPS" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPS" iform="VPERMILPS_XMMdq_XMMdq_IMMb" isa-set="AVX" string="VPERMILPS (XMM, XMM, I8)" summary="Permute In-Lane of Quadruples of Single-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPS_XMM_XMM_I8.html" url-ref="felixcloutier.com/x86/VPERMILPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="f32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPS" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPS" iform="VPERMILPS_YMMqq_MEMqq_IMMb" isa-set="AVX" string="VPERMILPS (YMM, M256, I8)" summary="Permute In-Lane of Quadruples of Single-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPS_YMM_M256_I8.html" url-ref="felixcloutier.com/x86/VPERMILPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" memory-prefix="ymmword ptr" name="MEM0" r="1" type="mem" width="256" xtype="f32"/>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="8" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.0" latency="8.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPERMILPS" category="AVX" cpl="3" extension="AVX" iclass="VPERMILPS" iform="VPERMILPS_YMMqq_YMMqq_IMMb" isa-set="AVX" string="VPERMILPS (YMM, YMM, I8)" summary="Permute In-Lane of Quadruples of Single-Precision Floating-Point Values" url="uops.info/html-instr/VPERMILPS_YMM_YMM_I8.html" url-ref="felixcloutier.com/x86/VPERMILPS.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="256" xtype="f32">YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,YMM8,YMM9,YMM10,YMM11,YMM12,YMM13,YMM14,YMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="1" ports="1*p5" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p5" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p5" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p5" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="1" ports="FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="2"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP1/2" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPEXTRB" category="AVX" cpl="3" extension="AVX" iclass="VPEXTRB" iform="VPEXTRB_MEMb_XMMdq_IMMb" isa-set="AVX" string="VPEXTRB (M8, XMM, I8)" summary="Extract Byte/Dword/Qword" url="uops.info/html-instr/VPEXTRB_M8_XMM_I8.html" url-ref="felixcloutier.com/x86/PEXTRB:PEXTRD:PEXTRQ.html" vex="1">
      <operand idx="1" memory-prefix="byte ptr" name="MEM0" type="mem" w="1" width="8" xtype="u8"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="18" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="17" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.88" TP_indexed="0.80" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.88" TP_indexed="0.80" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="18" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="13" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p49+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="20" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p49+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="20" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p49+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="20" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="18" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="17" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP12" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="27" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPEXTRB" category="AVX" cpl="3" extension="AVX" iclass="VPEXTRB" iform="VPEXTRB_GPR32d_XMMdq_IMMb" isa-set="AVX" string="VPEXTRB (R32, XMM, I8)" summary="Extract Byte/Dword/Qword" url="uops.info/html-instr/VPEXTRB_R32_XMM_I8.html" url-ref="felixcloutier.com/x86/PEXTRB:PEXTRD:PEXTRQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0+1*p15" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0+1*p15" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP12" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VPEXTRD" category="AVX" cpl="3" extension="AVX" iclass="VPEXTRD" iform="VPEXTRD_MEMd_XMMdq_IMMb" isa-set="AVX" string="VPEXTRD (M32, XMM, I8)" summary="Extract Byte/Dword/Qword" url="uops.info/html-instr/VPEXTRD_M32_XMM_I8.html" url-ref="felixcloutier.com/x86/PEXTRB:PEXTRD:PEXTRQ.html" vex="1">
      <operand idx="1" memory-prefix="dword ptr" name="MEM0" type="mem" w="1" width="32" xtype="i32"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p15+1*p23+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_MITE="3" uops_MITE_indexed="3" uops_MS="0" uops_MS_indexed="0" uops_indexed="4" uops_retire_slots="3" uops_retire_slots_indexed="4">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.88" TP_indexed="0.80" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.88" TP_indexed="0.80" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p49+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p49+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p49+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP12" uops="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPEXTRD" category="AVX" cpl="3" extension="AVX" iclass="VPEXTRD" iform="VPEXTRD_GPR32d_XMMdq_IMMb" isa-set="AVX" string="VPEXTRD (R32, XMM, I8)" summary="Extract Byte/Dword/Qword" url="uops.info/html-instr/VPEXTRD_R32_XMM_I8.html" url-ref="felixcloutier.com/x86/PEXTRB:PEXTRD:PEXTRQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0+1*p15" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0+1*p15" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP12" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VPEXTRQ" category="AVX" cpl="3" extension="AVX" iclass="VPEXTRQ" iform="VPEXTRQ_MEMq_XMMdq_IMMb" isa-set="AVX" string="VPEXTRQ (M64, XMM, I8)" summary="Extract Byte/Dword/Qword" url="uops.info/html-instr/VPEXTRQ_M64_XMM_I8.html" url-ref="felixcloutier.com/x86/PEXTRB:PEXTRD:PEXTRQ.html" vex="1">
      <operand idx="1" memory-prefix="qword ptr" name="MEM0" type="mem" w="1" width="64" xtype="i64"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p0+1*p15+1*p23+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p23+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p23+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="0" available_simple_decoders_indexed="0" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p15+1*p23+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_MITE="3" uops_MITE_indexed="3" uops_MS="0" uops_MS_indexed="0" uops_indexed="4" uops_retire_slots="3" uops_retire_slots_indexed="4">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="7" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p0+1*p15+1*p23+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p23+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p15+1*p23+1*p4" ports_indexed="1*p0+1*p15+1*p23+1*p4" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.88" TP_indexed="0.80" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.88" TP_indexed="0.80" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p49+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p49+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p49+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="5" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="8" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP12" uops="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="9" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPEXTRQ" category="AVX" cpl="3" extension="AVX" iclass="VPEXTRQ" iform="VPEXTRQ_GPR64q_XMMdq_IMMb" isa-set="AVX" string="VPEXTRQ (R64, XMM, I8)" summary="Extract Byte/Dword/Qword" url="uops.info/html-instr/VPEXTRQ_R64_XMM_I8.html" url-ref="felixcloutier.com/x86/PEXTRB:PEXTRD:PEXTRQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="64" xtype="i64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0+1*p15" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0+1*p15" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP12" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VPEXTRW" category="AVX" cpl="3" extension="AVX" iclass="VPEXTRW" iform="VPEXTRW_MEMw_XMMdq_IMMb" isa-set="AVX" string="VPEXTRW (M16, XMM, I8)" summary="Extract Word" url="uops.info/html-instr/VPEXTRW_M16_XMM_I8.html" url-ref="felixcloutier.com/x86/PEXTRW.html" vex="1">
      <operand idx="1" memory-prefix="word ptr" name="MEM0" type="mem" w="1" width="16" xtype="i16"/>
      <operand idx="2" name="REG0" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="18" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="3">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="17" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="5" ports="1*p15+1*p237+1*p4" ports_indexed="1*p15+1*p23+1*p4" uops="3" uops_indexed="3" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.88" TP_indexed="0.80" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="16" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.88" TP_indexed="0.80" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="18" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="2.3"/>
        <IACA TP="0.85" TP_indexed="0.76" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_indexed="3" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="3" available_simple_decoders_indexed="3" complex_decoder="1" complex_decoder_indexed="1" ports="1*p237+1*p4+1*p5" ports_indexed="1*p23+1*p4+1*p5" uops="3" uops_MITE="2" uops_MITE_indexed="2" uops_MS="0" uops_MS_indexed="0" uops_indexed="3" uops_retire_slots="2" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="15" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p49+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="20" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p49+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="20" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p49+1*p78" uops="3" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="20" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="18" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="17" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP12" uops="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" start_op="1" target_op="1"/>
          <latency cycles="27" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPEXTRW" category="AVX" cpl="3" extension="AVX" iclass="VPEXTRW" iform="VPEXTRW_GPR32d_XMMdq_IMMb_C5" isa-set="AVX" string="VPEXTRW (R32, XMM, I8)" summary="Extract Word" url="uops.info/html-instr/VPEXTRW_R32_XMM_I8.html" url-ref="felixcloutier.com/x86/PEXTRW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="32" xtype="i32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0+1*p15" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0+1*p15" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p15" uops="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p1+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="2" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="2.3"/>
        <IACA TP="0.96" TP_ports="1.00" ports="1*p0+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p0+1*p15" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="4" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="2"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_unrolled="1.00" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="0.50" TP_unrolled="1.00" ports="1*FP12" uops="2">
          <latency cycles="6" cycles_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="2" ports="FP1/2, FP2" uops="2"/>
      </architecture>
    </instruction>
    <instruction asm="VPHADDD" category="AVX" cpl="3" extension="AVX" iclass="VPHADDD" iform="VPHADDD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPHADDD (XMM, XMM, M128)" summary="Packed Horizontal Add" url="uops.info/html-instr/VPHADDD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PHADDW:PHADDD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" latency="9" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" latency="9" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="9" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.08" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="1" complex_decoder="1" ports="1*p015+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="1" complex_decoder="1" ports="1*p015+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.08" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="1" complex_decoder="1" ports="1*p015+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP0123+1*FP1+1*FP12" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPHADDD" category="AVX" cpl="3" extension="AVX" iclass="VPHADDD" iform="VPHADDD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPHADDD (XMM, XMM, XMM)" summary="Packed Horizontal Add" url="uops.info/html-instr/VPHADDD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PHADDW:PHADDD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" latency="3" ports="3*p15" uops="3" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" latency="3" ports="3*p15" uops="3" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="3" ports="1*p15+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="2" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP0123+1*FP1+1*FP12" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VPHADDSW" category="AVX" cpl="3" extension="AVX" iclass="VPHADDSW" iform="VPHADDSW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPHADDSW (XMM, XMM, M128)" summary="Packed Horizontal Add and Saturate" url="uops.info/html-instr/VPHADDSW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PHADDSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" latency="9" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" latency="9" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="9" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+1*p23+2*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+1*p23+2*p5" uops="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+1*p23+2*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+1*p23+2*p5" uops="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+1*p23+2*p5" uops="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="1" complex_decoder="1" ports="1*p01+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.05" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="1" complex_decoder="1" ports="1*p01+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="1" complex_decoder="1" ports="1*p01+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP01+2*FP12" uops="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPHADDSW" category="AVX" cpl="3" extension="AVX" iclass="VPHADDSW" iform="VPHADDSW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPHADDSW (XMM, XMM, XMM)" summary="Packed Horizontal Add and Saturate" url="uops.info/html-instr/VPHADDSW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PHADDSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" latency="3" ports="3*p15" uops="3" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" latency="3" ports="3*p15" uops="3" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="3" ports="1*p15+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="2" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP0123+1*FP1+1*FP12" uops="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VPHADDW" category="AVX" cpl="3" extension="AVX" iclass="VPHADDW" iform="VPHADDW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPHADDW (XMM, XMM, M128)" summary="Packed Horizontal Add" url="uops.info/html-instr/VPHADDW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PHADDW:PHADDD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" latency="9" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" latency="9" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="9" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.08" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="1" complex_decoder="1" ports="1*p015+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="1" complex_decoder="1" ports="1*p015+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.08" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="1" complex_decoder="1" ports="1*p015+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP0123+1*FP1+1*FP12" uops="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPHADDW" category="AVX" cpl="3" extension="AVX" iclass="VPHADDW" iform="VPHADDW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPHADDW (XMM, XMM, XMM)" summary="Packed Horizontal Add" url="uops.info/html-instr/VPHADDW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PHADDW:PHADDD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" latency="3" ports="3*p15" uops="3" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" latency="3" ports="3*p15" uops="3" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="3" ports="1*p15+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="2" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP0123+1*FP1+1*FP12" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VPHMINPOSUW" category="AVX" cpl="3" extension="AVX" iclass="VPHMINPOSUW" iform="VPHMINPOSUW_XMMdq_MEMdq" isa-set="AVX" string="VPHMINPOSUW (XMM, M128)" summary="Packed Horizontal Word Minimum" url="uops.info/html-instr/VPHMINPOSUW_XMM_M128.html" url-ref="felixcloutier.com/x86/PHMINPOSUW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u16"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.29" TP_loop_indexed="1.31" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.28" TP_unrolled_indexed="1.30" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
        <doc latency="10.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPHMINPOSUW" category="AVX" cpl="3" extension="AVX" iclass="VPHMINPOSUW" iform="VPHMINPOSUW_XMMdq_XMMdq" isa-set="AVX" string="VPHMINPOSUW (XMM, XMM)" summary="Packed Horizontal Word Minimum" url="uops.info/html-instr/VPHMINPOSUW_XMM_XMM.html" url-ref="felixcloutier.com/x86/PHMINPOSUW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.30" TP_ports="1.00" TP_unrolled="1.31" available_simple_decoders="2" complex_decoder="1" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.99" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.33" TP_ports="0.33" ports="1*p015" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="4" start_op="2" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="3" ports="FP1" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP1" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="1.00" ports="FP1" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP01" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/1" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPHSUBD" category="AVX" cpl="3" extension="AVX" iclass="VPHSUBD" iform="VPHSUBD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPHSUBD (XMM, XMM, M128)" summary="Packed Horizontal Subtract" url="uops.info/html-instr/VPHSUBD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PHSUBW:PHSUBD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" latency="9" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15+1*p23" uops="4" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15+1*p23" uops="4" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" latency="9" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15+1*p23" uops="4" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15+1*p23" uops="4" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="9" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+1*p23+2*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+1*p23+2*p5" uops="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+1*p23+2*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+1*p23+2*p5" uops="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+1*p23+2*p5" uops="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+1*p23+2*p5" uops="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.08" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="1" complex_decoder="1" ports="1*p015+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="1" complex_decoder="1" ports="1*p015+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.08" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="1" complex_decoder="1" ports="1*p015+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP0123+1*FP1+1*FP12" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPHSUBD" category="AVX" cpl="3" extension="AVX" iclass="VPHSUBD" iform="VPHSUBD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPHSUBD (XMM, XMM, XMM)" summary="Packed Horizontal Subtract" url="uops.info/html-instr/VPHSUBD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PHSUBW:PHSUBD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" latency="3" ports="3*p15" uops="3" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" latency="3" ports="3*p15" uops="3" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="3" ports="1*p15+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="2" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP0123+1*FP1+1*FP12" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VPHSUBSW" category="AVX" cpl="3" extension="AVX" iclass="VPHSUBSW" iform="VPHSUBSW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPHSUBSW (XMM, XMM, M128)" summary="Packed Horizontal Subtract and Saturate" url="uops.info/html-instr/VPHSUBSW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PHSUBSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" latency="9" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" latency="9" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="9" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p01+1*p23+2*p5" ports_indexed="1*p01+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p01+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="1" complex_decoder="1" ports="1*p01+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.05" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="1" complex_decoder="1" ports="1*p01+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="1" complex_decoder="1" ports="1*p01+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP01+2*FP12" uops="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPHSUBSW" category="AVX" cpl="3" extension="AVX" iclass="VPHSUBSW" iform="VPHSUBSW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPHSUBSW (XMM, XMM, XMM)" summary="Packed Horizontal Subtract and Saturate" url="uops.info/html-instr/VPHSUBSW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PHSUBSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" latency="3" ports="3*p15" uops="3" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" latency="3" ports="3*p15" uops="3" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="3" ports="1*p15+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p01+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.06" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="2" complex_decoder="1" ports="1*p01+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="2" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP0123+1*FP1+1*FP12" uops="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VPHSUBW" category="AVX" cpl="3" extension="AVX" iclass="VPHSUBW" iform="VPHSUBW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPHSUBW (XMM, XMM, M128)" summary="Packed Horizontal Subtract" url="uops.info/html-instr/VPHSUBW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PHSUBW:PHSUBD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" latency="9" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" latency="9" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_no_interiteration="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.2"/>
        <IACA TP="1.50" TP_indexed="1.50" TP_ports="1.50" TP_ports_indexed="1.50" fusion_occurred="1" ports="3*p15+1*p23" ports_indexed="3*p15+1*p23" uops="4" uops_indexed="4" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_indexed="2.00" TP_no_interiteration="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" latency="9" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+1*p23+2*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+1*p23+2*p5" uops="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+1*p23+2*p5" uops="4" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+1*p23+2*p5" uops="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p15+1*p23+2*p5" ports_indexed="1*p15+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+1*p23+2*p5" uops="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="2.3"/>
        <IACA TP="2.00" TP_indexed="2.00" TP_ports="2.00" TP_ports_indexed="2.00" fusion_occurred="1" ports="1*p015+1*p23+2*p5" ports_indexed="1*p015+1*p23+2*p5" uops="4" uops_indexed="4" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="1" complex_decoder="1" ports="1*p015+1*p23+2*p5" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.08" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="1" complex_decoder="1" ports="1*p015+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="1" complex_decoder="1" ports="1*p015+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.08" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="1" complex_decoder="1" ports="1*p015+2*p15+1*p23" uops="4" uops_MITE="4" uops_MS="0" uops_retire_slots="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="7" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="10" cycles_addr_index="10" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP0123+1*FP1+1*FP12" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPHSUBW" category="AVX" cpl="3" extension="AVX" iclass="VPHSUBW" iform="VPHSUBW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPHSUBW (XMM, XMM, XMM)" summary="Packed Horizontal Subtract" url="uops.info/html-instr/VPHSUBW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PHSUBW:PHSUBD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" latency="3" ports="3*p15" uops="3" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" latency="3" ports="3*p15" uops="3" version="2.1"/>
        <IACA TP="1.50" TP_no_interiteration="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.2"/>
        <IACA TP="1.50" TP_ports="1.50" ports="3*p15" uops="3" version="2.3"/>
        <measurement TP_loop="1.50" TP_ports="1.50" TP_unrolled="1.50" available_simple_decoders="0" complex_decoder="1" ports="3*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="3" ports="1*p15+2*p5" uops="3" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="1*p15+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="2.3"/>
        <IACA TP="1.92" TP_ports="2.00" ports="1*p015+2*p5" uops="3" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p5" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p015+2*p15" uops="3" uops_MITE="3" uops_MS="0" uops_retire_slots="3">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="2" uops="ucode"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="2.00" TP_unrolled="2.00" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="2" uops="ucode"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="2.00" TP_ports="1.00" TP_unrolled="2.00" ports="1*FP0123+1*FP1+1*FP12" uops="4">
          <latency cycles="2" start_op="2" target_op="1"/>
          <latency cycles="2" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="2.00" latency="2" uops="ucode"/>
      </architecture>
    </instruction>
    <instruction asm="VPINSRB" category="AVX" cpl="3" extension="AVX" iclass="VPINSRB" iform="VPINSRB_XMMdq_XMMdq_MEMb_IMMb" isa-set="AVX" string="VPINSRB (XMM, XMM, M8, I8)" summary="Insert Byte/Dword/Qword" url="uops.info/html-instr/VPINSRB_XMM_XMM_M8_I8.html" url-ref="felixcloutier.com/x86/PINSRB:PINSRD:PINSRQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="byte ptr" name="MEM0" r="1" type="mem" width="8" xtype="u8"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p15+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p15+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPINSRB" category="AVX" cpl="3" extension="AVX" iclass="VPINSRB" iform="VPINSRB_XMMdq_XMMdq_GPR32d_IMMb" isa-set="AVX" string="VPINSRB (XMM, XMM, R32, I8)" summary="Insert Byte/Dword/Qword" url="uops.info/html-instr/VPINSRB_XMM_XMM_R32_I8.html" url-ref="felixcloutier.com/x86/PINSRB:PINSRD:PINSRQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="u8">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p15+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p15+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="2" ports="2*p5" uops="2" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.95" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.08" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.08" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.08" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.25" TP_unrolled="1.23" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.20" TP_unrolled="1.20" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP1" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPINSRD" category="AVX" cpl="3" extension="AVX" iclass="VPINSRD" iform="VPINSRD_XMMdq_XMMdq_MEMd_IMMb" isa-set="AVX" string="VPINSRD (XMM, XMM, M32, I8)" summary="Insert Byte/Dword/Qword" url="uops.info/html-instr/VPINSRD_XMM_XMM_M32_I8.html" url-ref="felixcloutier.com/x86/PINSRB:PINSRD:PINSRQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="dword ptr" name="MEM0" r="1" type="mem" width="32" xtype="u32"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p15+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p15+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPINSRD" category="AVX" cpl="3" extension="AVX" iclass="VPINSRD" iform="VPINSRD_XMMdq_XMMdq_GPR32d_IMMb" isa-set="AVX" string="VPINSRD (XMM, XMM, R32, I8)" summary="Insert Byte/Dword/Qword" url="uops.info/html-instr/VPINSRD_XMM_XMM_R32_I8.html" url-ref="felixcloutier.com/x86/PINSRB:PINSRD:PINSRQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="u32">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p15+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p15+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="2" ports="2*p5" uops="2" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.95" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.08" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.08" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.08" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.23" TP_unrolled="1.23" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.20" TP_unrolled="1.20" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP1" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPINSRQ" category="AVX" cpl="3" extension="AVX" iclass="VPINSRQ" iform="VPINSRQ_XMMdq_XMMdq_MEMq_IMMb" isa-set="AVX" string="VPINSRQ (XMM, XMM, M64, I8)" summary="Insert Byte/Dword/Qword" url="uops.info/html-instr/VPINSRQ_XMM_XMM_M64_I8.html" url-ref="felixcloutier.com/x86/PINSRB:PINSRD:PINSRQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="qword ptr" name="MEM0" r="1" type="mem" width="64" xtype="u64"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p15+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p15+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPINSRQ" category="AVX" cpl="3" extension="AVX" iclass="VPINSRQ" iform="VPINSRQ_XMMdq_XMMdq_GPR64q_IMMb" isa-set="AVX" string="VPINSRQ (XMM, XMM, R64, I8)" summary="Insert Byte/Dword/Qword" url="uops.info/html-instr/VPINSRQ_XMM_XMM_R64_I8.html" url-ref="felixcloutier.com/x86/PINSRB:PINSRD:PINSRQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u64">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="64" xtype="u64">RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p15+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p15+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="2" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="2" ports="2*p5" uops="2" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.95" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="2" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.08" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.08" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.08" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.25" TP_unrolled="1.23" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.20" TP_unrolled="1.20" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP1" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPINSRW" category="AVX" cpl="3" extension="AVX" iclass="VPINSRW" iform="VPINSRW_XMMdq_XMMdq_MEMw_IMMb" isa-set="AVX" string="VPINSRW (XMM, XMM, M16, I8)" summary="Insert Word" url="uops.info/html-instr/VPINSRW_XMM_XMM_M16_I8.html" url-ref="felixcloutier.com/x86/PINSRW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="word ptr" name="MEM0" r="1" type="mem" width="16" xtype="u16"/>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p15+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="7" ports="1*p15+1*p23" uops="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="7" ports="1*p23+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="0" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p23+1*p5" ports_indexed="1*p23+1*p5" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p23+1*p5" uops="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p23+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p23" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP12" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPINSRW" category="AVX" cpl="3" extension="AVX" iclass="VPINSRW" iform="VPINSRW_XMMdq_XMMdq_GPR32d_IMMb" isa-set="AVX" string="VPINSRW (XMM, XMM, R32, I8)" summary="Insert Word" url="uops.info/html-instr/VPINSRW_XMM_XMM_R32_I8.html" url-ref="felixcloutier.com/x86/PINSRW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="32" xtype="u16">EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D</operand>
      <operand idx="4" name="IMM0" r="1" type="imm" width="8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p15+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.07" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="2" ports="1*p15+1*p5" uops="2" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p15+1*p5" uops="2" version="2.3"/>
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.06" available_simple_decoders="0" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" latency="2" ports="2*p5" uops="2" version="2.1"/>
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="2.00" TP_no_interiteration="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.2"/>
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.95" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="0" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="2" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="2.00" TP_ports="2.00" ports="2*p5" uops="2" version="2.3"/>
        <IACA TP="1.94" TP_ports="2.00" ports="2*p5" uops="2" version="3.0"/>
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="2.00" TP_ports="2.00" TP_unrolled="2.00" available_simple_decoders="3" complex_decoder="1" ports="2*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.08" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.08" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="1.07" TP_ports="1.00" TP_unrolled="1.08" available_simple_decoders="3" complex_decoder="1" ports="1*p15+1*p5" uops="2" uops_MITE="2" uops_MS="0" uops_retire_slots="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="4" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.25" TP_unrolled="1.23" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.20" TP_unrolled="1.20" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP1" uops="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="6" cycles_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPMADDUBSW" category="AVX" cpl="3" extension="AVX" iclass="VPMADDUBSW" iform="VPMADDUBSW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPMADDUBSW (XMM, XMM, M128)" summary="Multiply and Add Packed Signed and Unsigned Bytes" url="uops.info/html-instr/VPMADDUBSW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PMADDUBSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i8"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.54" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.52" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPMADDUBSW" category="AVX" cpl="3" extension="AVX" iclass="VPMADDUBSW" iform="VPMADDUBSW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPMADDUBSW (XMM, XMM, XMM)" summary="Multiply and Add Packed Signed and Unsigned Bytes" url="uops.info/html-instr/VPMADDUBSW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PMADDUBSW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.99" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP0" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="4" start_op="2" target_op="1"/>
          <latency cycles="4" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="4" ports="FP0" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPMADDWD" category="AVX" cpl="3" extension="AVX" iclass="VPMADDWD" iform="VPMADDWD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPMADDWD (XMM, XMM, M128)" summary="Multiply and Add Packed Integers" url="uops.info/html-instr/VPMADDWD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PMADDWD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="9" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="1.02" TP_loop_indexed="1.02" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" latency="11" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_indexed="1.00" TP_no_interiteration="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="1.00" TP_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" fusion_occurred="1" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="1.00" TP_loop_indexed="1.00" TP_ports="1.00" TP_ports_indexed="1.00" TP_unrolled="1.00" TP_unrolled_indexed="1.00" ports="1*p0+1*p23" ports_indexed="1*p0+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p015+1*p23" ports_indexed="1*p015+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.53" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.53" TP_loop_indexed="0.53" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.53" TP_unrolled_indexed="0.52" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.55" TP_loop_indexed="0.55" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.55" TP_unrolled_indexed="0.55" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles_addr="12" cycles_addr_index="12" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="10" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles_addr="11" cycles_addr_index="11" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="11" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPMADDWD" category="AVX" cpl="3" extension="AVX" iclass="VPMADDWD" iform="VPMADDWD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPMADDWD (XMM, XMM, XMM)" summary="Multiply and Add Packed Integers" url="uops.info/html-instr/VPMADDWD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PMADDWD.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="3" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" latency="5" ports="1*p0" uops="1" version="2.1"/>
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.98" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="1.00" TP_no_interiteration="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.2"/>
        <IACA TP="1.00" TP_ports="1.00" ports="1*p0" uops="1" version="2.3"/>
        <IACA TP="0.99" TP_ports="1.00" ports="1*p0" uops="1" version="3.0"/>
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*p0" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.34" TP_ports="0.33" ports="1*p015" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="5" start_op="2" target_op="1"/>
          <latency cycles="5" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP0" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="1.00" TP_ports="1.00" TP_unrolled="1.00" ports="1*FP0" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="1.00" latency="3" ports="FP0" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*FP03" uops="1">
          <latency cycles="3" start_op="2" target_op="1"/>
          <latency cycles="3" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="3" ports="FP0/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPMAXSB" category="AVX" cpl="3" extension="AVX" iclass="VPMAXSB" iform="VPMAXSB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPMAXSB (XMM, XMM, M128)" summary="Maximum of Packed Signed Integers" url="uops.info/html-instr/VPMAXSB_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PMAXSB:PMAXSW:PMAXSD:PMAXSQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPMAXSB" category="AVX" cpl="3" extension="AVX" iclass="VPMAXSB" iform="VPMAXSB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPMAXSB (XMM, XMM, XMM)" summary="Maximum of Packed Signed Integers" url="uops.info/html-instr/VPMAXSB_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PMAXSB:PMAXSW:PMAXSD:PMAXSQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.36" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.31" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPMAXSD" category="AVX" cpl="3" extension="AVX" iclass="VPMAXSD" iform="VPMAXSD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPMAXSD (XMM, XMM, M128)" summary="Maximum of Packed Signed Integers" url="uops.info/html-instr/VPMAXSD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PMAXSB:PMAXSW:PMAXSD:PMAXSQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPMAXSD" category="AVX" cpl="3" extension="AVX" iclass="VPMAXSD" iform="VPMAXSD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPMAXSD (XMM, XMM, XMM)" summary="Maximum of Packed Signed Integers" url="uops.info/html-instr/VPMAXSD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PMAXSB:PMAXSW:PMAXSD:PMAXSQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.36" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.31" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPMAXSW" category="AVX" cpl="3" extension="AVX" iclass="VPMAXSW" iform="VPMAXSW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPMAXSW (XMM, XMM, M128)" summary="Maximum of Packed Signed Integers" url="uops.info/html-instr/VPMAXSW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PMAXSB:PMAXSW:PMAXSD:PMAXSQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="i16"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPMAXSW" category="AVX" cpl="3" extension="AVX" iclass="VPMAXSW" iform="VPMAXSW_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPMAXSW (XMM, XMM, XMM)" summary="Maximum of Packed Signed Integers" url="uops.info/html-instr/VPMAXSW_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PMAXSB:PMAXSW:PMAXSD:PMAXSQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="i16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPMAXUB" category="AVX" cpl="3" extension="AVX" iclass="VPMAXUB" iform="VPMAXUB_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPMAXUB (XMM, XMM, M128)" summary="Maximum of Packed Unsigned Integers" url="uops.info/html-instr/VPMAXUB_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PMAXUB:PMAXUW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u8"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPMAXUB" category="AVX" cpl="3" extension="AVX" iclass="VPMAXUB" iform="VPMAXUB_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPMAXUB (XMM, XMM, XMM)" summary="Maximum of Packed Unsigned Integers" url="uops.info/html-instr/VPMAXUB_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PMAXUB:PMAXUW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u8">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.25" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPMAXUD" category="AVX" cpl="3" extension="AVX" iclass="VPMAXUD" iform="VPMAXUD_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPMAXUD (XMM, XMM, M128)" summary="Maximum of Packed Unsigned Integers" url="uops.info/html-instr/VPMAXUD_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PMAXUD:PMAXUQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u32"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.1"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="1.00" TP_unrolled_indexed="1.00" available_simple_decoders="2" available_simple_decoders_indexed="2" complex_decoder="1" complex_decoder_indexed="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.2"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="7" cycles_addr_index="7" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="6" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="2.3"/>
        <IACA TP="0.50" TP_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_indexed="2" version="3.0"/>
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="7.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_loop_indexed="0.50" TP_ports="0.50" TP_ports_indexed="0.50" TP_unrolled="0.50" TP_unrolled_indexed="0.50" ports="1*p01+1*p23" ports_indexed="1*p01+1*p23" uops="2" uops_MITE="1" uops_MITE_indexed="1" uops_MS="0" uops_MS_indexed="0" uops_indexed="2" uops_retire_slots="1" uops_retire_slots_indexed="2">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="8" cycles_addr_index="8" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="5" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.50" TP_ports="0.33" TP_unrolled="0.50" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="8" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.50" TP_ports="0.25" TP_unrolled="0.50" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles_addr="9" cycles_addr_index="9" cycles_addr_index_is_upper_bound="1" cycles_addr_is_upper_bound="1" cycles_mem="9" cycles_mem_is_upper_bound="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
    </instruction>
    <instruction asm="VPMAXUD" category="AVX" cpl="3" extension="AVX" iclass="VPMAXUD" iform="VPMAXUD_XMMdq_XMMdq_XMMdq" isa-set="AVX" string="VPMAXUD (XMM, XMM, XMM)" summary="Maximum of Packed Unsigned Integers" url="uops.info/html-instr/VPMAXUD_XMM_XMM_XMM.html" url-ref="felixcloutier.com/x86/PMAXUD:PMAXUQ.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" name="REG2" r="1" type="reg" width="128" xtype="u32">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <architecture name="SNB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="IVB">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="HSW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" latency="1" ports="1*p15" uops="1" version="2.1"/>
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="1.00" available_simple_decoders="2" complex_decoder="1" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="BDW">
        <IACA TP="0.50" TP_no_interiteration="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.2"/>
        <IACA TP="0.50" TP_ports="0.50" ports="1*p15" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p15" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p15" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKL">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="SKX">
        <IACA TP="0.50" TP_ports="0.50" ports="1*p01" uops="1" version="2.3"/>
        <IACA TP="0.49" TP_ports="0.50" ports="1*p01" uops="1" version="3.0"/>
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="KBL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CFL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CNL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="CLX">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ICL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.5" latency="1.0"/>
      </architecture>
      <architecture name="TGL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="RKL">
        <measurement TP_loop="0.50" TP_ports="0.50" TP_unrolled="0.50" ports="1*p01" uops="1" uops_MITE="1" uops_MS="0" uops_retire_slots="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
      </architecture>
      <architecture name="ZEN+">
        <measurement TP_loop="0.36" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN2">
        <measurement TP_loop="0.33" TP_ports="0.33" TP_unrolled="0.33" ports="1*FP013" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.50" latency="1" ports="FP0/3" uops="1"/>
      </architecture>
      <architecture name="ZEN3">
        <measurement TP_loop="0.25" TP_ports="0.25" TP_unrolled="0.31" ports="1*FP0123" uops="1">
          <latency cycles="1" start_op="2" target_op="1"/>
          <latency cycles="1" start_op="3" target_op="1"/>
        </measurement>
        <doc TP="0.25" latency="1" ports="FP0/1/2/3" uops="1"/>
      </architecture>
    </instruction>
    <instruction asm="VPMAXUW" category="AVX" cpl="3" extension="AVX" iclass="VPMAXUW" iform="VPMAXUW_XMMdq_XMMdq_MEMdq" isa-set="AVX" string="VPMAXUW (XMM, XMM, M128)" summary="Maximum of Packed Unsigned Integers" url="uops.info/html-instr/VPMAXUW_XMM_XMM_M128.html" url-ref="felixcloutier.com/x86/PMAXUB:PMAXUW.html" vex="1">
      <operand idx="1" name="REG0" type="reg" w="1" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="2" name="REG1" r="1" type="reg" width="128" xtype="u16">XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,XMM8,XMM9,XMM10,XMM11,XMM12,XMM13,XMM14,XMM15</operand>
      <operand idx="3" memory-prefix="xmmword ptr" name="MEM0" r="1" type="mem" width="128" xtype="u16"/>
      <architecture name="SNB">
        <IACA TP="0.50" TP_indexed="0.50" TP_no_interiteration="0.50" TP_ports="0.50" TP_ports_indexed="0.50" fusion_occurred="1" latency="7" ports="1*p15+1*p23" ports_indexed="1*p15+1*p23